diff options
Diffstat (limited to 'llvm/lib/CodeGen')
27 files changed, 536 insertions, 154 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 3b3e7a4..dcfd9aa 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -2083,22 +2083,55 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (TBB == FBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); } else { + // Merge the debug locations, and hoist and kill the debug instructions from + // both branches. FIXME: We could probably try harder to preserve some debug + // instructions (but at least this isn't producing wrong locations). + MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc); + auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) { + assert(DI->isDebugInstr() && "Expected a debug instruction"); + if (DI->isDebugRef()) { + const TargetInstrInfo *TII = + MBB->getParent()->getSubtarget().getInstrInfo(); + const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE); + DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0, + DI->getDebugVariable(), DI->getDebugExpression()); + MBB->insert(Loc, &*DI); + return; + } + // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF. + if (DI->isDebugPHI()) { + DI->eraseFromParent(); + return; + } + // Move DBG_LABELs without modifying them. Set DBG_VALUEs undef. + if (!DI->isDebugLabel()) + DI->setDebugValueUndef(); + DI->moveBefore(&*Loc); + }; + // TIB and FIB point to the end of the regions to hoist/merge in TBB and // FBB. MachineBasicBlock::iterator FE = FIB; MachineBasicBlock::iterator FI = FBB->begin(); for (MachineBasicBlock::iterator TI : make_early_inc_range(make_range(TBB->begin(), TIB))) { - // Move debug instructions and pseudo probes without modifying them. - // FIXME: This is the wrong thing to do for debug locations, which - // should at least be killed (and hoisted from BOTH blocks). - if (TI->isDebugOrPseudoInstr()) { - TI->moveBefore(&*Loc); + // Hoist and kill debug instructions from FBB. After this loop FI points + // to the next non-debug instruction to hoist (checked in assert after the + // TBB debug instruction handling code). + while (FI != FE && FI->isDebugInstr()) + HoistAndKillDbgInstr(FI++); + + // Kill debug instructions before moving. + if (TI->isDebugInstr()) { + HoistAndKillDbgInstr(TI); continue; } - // Get the next non-meta instruction in FBB. - FI = skipDebugInstructionsForward(FI, FE, false); + // FI and TI now point to identical non-debug instructions. + assert(FI != FE && "Unexpected end of FBB range"); + // Pseudo probes are excluded from the range when identifying foldable + // instructions, so we don't expect to see one now. + assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range"); // NOTE: The loop above checks CheckKillDead but we can't do that here as // it modifies some kill markers after the check. assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) && @@ -2111,6 +2144,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { ++FI; } } + FBB->erase(FBB->begin(), FIB); if (UpdateLiveIns) diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 9512f79..810dc29 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -101,6 +101,7 @@ CGOPT(EABI, EABIVersion) CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) +CGOPT(bool, EnableCallGraphSection) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableStaticDataPartitioning) @@ -461,6 +462,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableAddrsig); + static cl::opt<bool> EnableCallGraphSection( + "call-graph-section", cl::desc("Emit a call graph section"), + cl::init(false)); + CGBINDOPT(EnableCallGraphSection); + static cl::opt<bool> EmitCallSiteInfo( "emit-call-site-info", cl::desc( @@ -595,6 +601,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EnableStaticDataPartitioning = getEnableStaticDataPartitioning(); Options.EmitAddrsig = getEnableAddrsig(); + Options.EmitCallGraphSection = getEnableCallGraphSection(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 8855740f..9b2851e 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -2186,19 +2186,16 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, llvm_unreachable("Deinterleave node should already have ReplacementNode"); break; case ComplexDeinterleavingOperation::Splat: { - auto *NewTy = VectorType::getDoubleElementsVectorType( - cast<VectorType>(Node->Real->getType())); auto *R = dyn_cast<Instruction>(Node->Real); auto *I = dyn_cast<Instruction>(Node->Imag); if (R && I) { // Splats that are not constant are interleaved where they are located Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode(); IRBuilder<> IRB(InsertPoint); - ReplacementNode = IRB.CreateIntrinsic(Intrinsic::vector_interleave2, - NewTy, {Node->Real, Node->Imag}); + ReplacementNode = IRB.CreateVectorInterleave({Node->Real, Node->Imag}); } else { - ReplacementNode = Builder.CreateIntrinsic( - Intrinsic::vector_interleave2, NewTy, {Node->Real, Node->Imag}); + ReplacementNode = + Builder.CreateVectorInterleave({Node->Real, Node->Imag}); } break; } @@ -2226,10 +2223,7 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0); auto *A = replaceNode(Builder, Node->Operands[0]); auto *B = replaceNode(Builder, Node->Operands[1]); - auto *NewMaskTy = VectorType::getDoubleElementsVectorType( - cast<VectorType>(MaskReal->getType())); - auto *NewMask = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, - NewMaskTy, {MaskReal, MaskImag}); + auto *NewMask = Builder.CreateVectorInterleave({MaskReal, MaskImag}); ReplacementNode = Builder.CreateSelect(NewMask, A, B); break; } @@ -2260,8 +2254,8 @@ void ComplexDeinterleavingGraph::processReductionSingle( } if (!NewInit) - NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, - {Init, Constant::getNullValue(VTy)}); + NewInit = + Builder.CreateVectorInterleave({Init, Constant::getNullValue(VTy)}); NewPHI->addIncoming(NewInit, Incoming); NewPHI->addIncoming(OperationReplacement, BackEdge); @@ -2281,16 +2275,12 @@ void ComplexDeinterleavingGraph::processReductionOperation( auto *OldPHIImag = ReductionInfo[Imag].first; auto *NewPHI = OldToNewPHI[OldPHIReal]; - auto *VTy = cast<VectorType>(Real->getType()); - auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); - // We have to interleave initial origin values coming from IncomingBlock Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming); Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming); IRBuilder<> Builder(Incoming->getTerminator()); - auto *NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, - {InitReal, InitImag}); + auto *NewInit = Builder.CreateVectorInterleave({InitReal, InitImag}); NewPHI->addIncoming(NewInit, Incoming); NewPHI->addIncoming(OperationReplacement, BackEdge); diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 012d873..9ba1782 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -1009,7 +1009,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + MIRBuilder.materializeObjectPtrOffset(Addr, DemoteReg, OffsetLLTy, + Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); @@ -1039,7 +1040,8 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + MIRBuilder.materializeObjectPtrOffset(Addr, DemoteReg, OffsetLLTy, + Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e8f513a..e84ba91 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5949,8 +5949,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, const TargetOptions &Options = MF->getTarget().Options; LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - if (CanReassociate && - !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc)) return false; // Floating-point multiply-add with intermediate rounding. @@ -5962,8 +5961,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, if (!HasFMAD && !HasFMA) return false; - AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD; + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD; // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index dc5dfab..fd38c30 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1409,7 +1409,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializeObjectPtrOffset(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(LI); @@ -1448,7 +1448,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializeObjectPtrOffset(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(SI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ed7b07f..d9d3569 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4170,7 +4170,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst); auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO); @@ -4277,8 +4277,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { LLT PtrTy = MRI.getType(PtrReg); auto OffsetCst = MIRBuilder.buildConstant( LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); + auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -5349,7 +5348,8 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, unsigned ByteOffset = Offset / 8; Register NewAddrReg; - MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy, + ByteOffset); MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, ByteOffset, PartTy); @@ -8004,7 +8004,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. return UnableToLegalize; - if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) { + if (MI.getFlag(MachineInstr::FmAfn)) { unsigned Flags = MI.getFlags(); auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags); MIRBuilder.buildFPTrunc(Dst, Src32, Flags); @@ -9822,7 +9822,7 @@ LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val, if (DstOff != 0) { auto Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); - Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0); } MIB.buildStore(Value, Ptr, *StoreMMO); @@ -9962,7 +9962,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, LLT SrcTy = MRI.getType(Src); Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); @@ -9970,7 +9970,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register StorePtr = Dst; if (CurrOffset != 0) { LLT DstTy = MRI.getType(Dst); - StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); @@ -10060,7 +10060,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, LLT SrcTy = MRI.getType(Src); auto Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -10078,7 +10078,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, LLT DstTy = MRI.getType(Dst); auto Offset = MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 121d7e8..27df7e3 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -208,11 +208,20 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags); } +MachineInstrBuilder MachineIRBuilder::buildObjectPtrOffset(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildPtrAdd(Res, Op0, Op1, + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); +} + std::optional<MachineInstrBuilder> MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, - const LLT ValueTy, uint64_t Value) { + const LLT ValueTy, uint64_t Value, + std::optional<unsigned> Flags) { assert(Res == 0 && "Res is a result argument"); - assert(ValueTy.isScalar() && "invalid offset type"); + assert(ValueTy.isScalar() && "invalid offset type"); if (Value == 0) { Res = Op0; @@ -221,7 +230,14 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); auto Cst = buildConstant(ValueTy, Value); - return buildPtrAdd(Res, Op0, Cst.getReg(0)); + return buildPtrAdd(Res, Op0, Cst.getReg(0), Flags); +} + +std::optional<MachineInstrBuilder> MachineIRBuilder::materializeObjectPtrOffset( + Register &Res, Register Op0, const LLT ValueTy, uint64_t Value) { + return materializePtrAdd(Res, Op0, ValueTy, Value, + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); } MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 7153902..8b72c29 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -217,6 +217,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) .Case("samesign", MIToken::kw_samesign) + .Case("inbounds", MIToken::kw_inbounds) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) @@ -616,6 +617,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Case("!range", MIToken::md_range) .Case("!DIExpression", MIToken::md_diexpr) .Case("!DILocation", MIToken::md_dilocation) + .Case("!noalias.addrspace", MIToken::md_noalias_addrspace) .Default(MIToken::Error); } diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index d7cd067..0627f17 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -78,6 +78,7 @@ struct MIToken { kw_nneg, kw_disjoint, kw_samesign, + kw_inbounds, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, @@ -151,6 +152,7 @@ struct MIToken { md_tbaa, md_alias_scope, md_noalias, + md_noalias_addrspace, md_range, md_diexpr, md_dilocation, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 3a364d5..6a464d9 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1477,7 +1477,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_nneg) || Token.is(MIToken::kw_disjoint) || Token.is(MIToken::kw_nusw) || - Token.is(MIToken::kw_samesign)) { + Token.is(MIToken::kw_samesign) || + Token.is(MIToken::kw_inbounds)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1518,6 +1519,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NoUSWrap; if (Token.is(MIToken::kw_samesign)) Flags |= MachineInstr::SameSign; + if (Token.is(MIToken::kw_inbounds)) + Flags |= MachineInstr::InBounds; lex(); } @@ -3482,6 +3485,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMDNode(AAInfo.NoAlias)) return true; break; + case MIToken::md_noalias_addrspace: + lex(); + if (parseMDNode(AAInfo.NoAliasAddrSpace)) + return true; + break; case MIToken::md_range: lex(); if (parseMDNode(Range)) @@ -3490,7 +3498,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { // TODO: Report an error on duplicate metadata nodes. default: return error("expected 'align' or '!tbaa' or '!alias.scope' or " - "'!noalias' or '!range'"); + "'!noalias' or '!range' or '!noalias.addrspace'"); } } if (expectAndConsume(MIToken::rparen)) diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 1e9fcf3..3e99e57 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -504,13 +504,21 @@ bool MIRParserImpl::initializeCallSiteInfo( return error(Error, ArgRegPair.Reg.SourceRange); CSInfo.ArgRegPairs.emplace_back(Reg, ArgRegPair.ArgNo); } + if (!YamlCSInfo.CalleeTypeIds.empty()) { + for (auto CalleeTypeId : YamlCSInfo.CalleeTypeIds) { + IntegerType *Int64Ty = Type::getInt64Ty(Context); + CSInfo.CalleeTypeIds.push_back(ConstantInt::get(Int64Ty, CalleeTypeId, + /*isSigned=*/false)); + } + } - if (TM.Options.EmitCallSiteInfo) + if (TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection) MF.addCallSiteInfo(&*CallI, std::move(CSInfo)); } - if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) - return error(Twine("Call site info provided but not used")); + if (!YamlMF.CallSitesInfo.empty() && + !(TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection)) + return error("call site info provided but not used"); return false; } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bc4e299..ce1834a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -525,24 +525,30 @@ static void convertCallSiteObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST) { const auto *TRI = MF.getSubtarget().getRegisterInfo(); - for (auto CSInfo : MF.getCallSitesInfo()) { + for (auto [MI, CallSiteInfo] : MF.getCallSitesInfo()) { yaml::CallSiteInfo YmlCS; yaml::MachineInstrLoc CallLocation; // Prepare instruction position. - MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator(); + MachineBasicBlock::const_instr_iterator CallI = MI->getIterator(); CallLocation.BlockNum = CallI->getParent()->getNumber(); // Get call instruction offset from the beginning of block. CallLocation.Offset = std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; + + auto [ArgRegPairs, CalleeTypeIds] = CallSiteInfo; // Construct call arguments and theirs forwarding register info. - for (auto ArgReg : CSInfo.second.ArgRegPairs) { + for (auto ArgReg : ArgRegPairs) { yaml::CallSiteInfo::ArgRegPair YmlArgReg; YmlArgReg.ArgNo = ArgReg.ArgNo; printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI); YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg); } + // Get type ids. + for (auto *CalleeTypeId : CalleeTypeIds) { + YmlCS.CalleeTypeIds.push_back(CalleeTypeId->getZExtValue()); + } YMF.CallSitesInfo.push_back(std::move(YmlCS)); } @@ -814,6 +820,8 @@ static void printMI(raw_ostream &OS, MFPrintState &State, OS << "nusw "; if (MI.getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (MI.getFlag(MachineInstr::InBounds)) + OS << "inbounds "; // NOTE: Please add new MIFlags also to the MI_FLAGS_STR in // llvm/utils/update_mir_test_checks.py. diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 429a17a..60d42e0 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -211,8 +211,7 @@ void MachineFunction::init() { ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); - // FIXME: Use Function::hasOptSize(). - if (!F.getAlign() && !F.hasFnAttribute(Attribute::OptimizeForSize)) + if (!F.getAlign() && !F.hasOptSize()) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -920,7 +919,7 @@ MachineFunction::getCallSiteInfo(const MachineInstr *MI) { assert(MI->isCandidateForAdditionalCallInfo() && "Call site info refers only to call (MI) candidates"); - if (!Target.Options.EmitCallSiteInfo) + if (!Target.Options.EmitCallSiteInfo && !Target.Options.EmitCallGraphSection) return CallSitesInfo.end(); return CallSitesInfo.find(MI); } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index da3665b..79047f7 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -585,6 +585,8 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::NoUSWrap; if (GEP->hasNoUnsignedWrap()) MIFlags |= MachineInstr::MIFlag::NoUWrap; + if (GEP->isInBounds()) + MIFlags |= MachineInstr::MIFlag::InBounds; } // Copy the nonneg flag. @@ -1860,8 +1862,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::NoUSWrap)) + OS << "nusw "; if (getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (getFlag(MachineInstr::InBounds)) + OS << "inbounds "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 0d25169..c612f8de 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1273,6 +1273,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ", !noalias "; AAInfo.NoAlias->printAsOperand(OS, MST); } + if (AAInfo.NoAliasAddrSpace) { + OS << ", !noalias.addrspace "; + AAInfo.NoAliasAddrSpace->printAsOperand(OS, MST); + } if (getRanges()) { OS << ", !range "; getRanges()->printAsOperand(OS, MST); diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 0f742c4..21bf052 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -423,7 +423,7 @@ void ModuloScheduleExpander::generateExistingPhis( // potentially define two values. unsigned MaxPhis = PrologStage + 2; if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + MaxPhis = std::max((int)MaxPhis - LoopValStage, 1); unsigned NumPhis = std::min(NumStages, MaxPhis); Register NewReg; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 2d7987a..7ede564 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p SubregToRegSrcInst is not empty, we are coalescing a + /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an + /// implicit-def of DstReg on instructions that define SrcReg. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInst = {}); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector<MachineOperand, 4> ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // We're only expecting another def of the main output, so the range - // should get updated with the regular output range. - // - // FIXME: The range updating below probably needs updating to look at - // the super register if subranges are tracked. - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "subrange update for implicit-def of super register may not be " - "properly handled"); + // If lanemasks need to be tracked, compile the lanemask of the NewMI + // implicit def operands to avoid subranges for the super-regs from + // being removed by code later on in this function. + if (MRI->shouldTrackSubRegLiveness(MO.getReg())) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } } @@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none()) { + if ((SR.LaneMask & DstMask).none() && + (SR.LaneMask & NewMIImplicitOpsMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { +void RegisterCoalescer::updateRegDefsUses( + Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInsts) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + // Coalescing a COPY may expose reads of 'undef' subregisters. + // If so, then explicitly propagate 'undef' to those operands. if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { for (MachineOperand &MO : MRI->reg_operands(DstReg)) { if (MO.isUndef()) @@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, } } + // If DstInt already has a subrange for the unused lanes, then we shouldn't + // create duplicate subranges when we update the interval for unused lanes. + LaneBitmask DstIntLaneMask; + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + for (LiveInterval::SubRange &SR : DstInt->subranges()) + DstIntLaneMask |= SR.LaneMask; + } + + // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. SmallPtrSet<MachineInstr *, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool RequiresImplicitRedef = false; + if (!SubregToRegSrcInsts.empty()) { + // We can only add an implicit-def and undef if the sub registers match, + // e.g. + // %0:gr32 = INSTX + // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined + // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 + // + // This cannot be transformed into: + // %1.sub32:gr64 = INSTX + // undef %1.sub8:gr64 = INSTY , implicit-def %1 + // + // Because that would thrash the top 24 bits of %1.sub32. + if (is_contained(SubregToRegSrcInsts, UseMI) && + all_of(UseMI->defs(), + [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool { + if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef()) + return true; + return SubIdx == MO.getSubReg(); + })) { + // Add implicit-def of super-register to express that the whole + // register is defined by the instruction. + MachineInstrBuilder MIB(*MF, UseMI); + MIB.addReg(DstReg, RegState::ImplicitDefine); + RequiresImplicitRedef = true; + } + + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + // First check if there is sufficient granularity in terms of subranges. + LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = DstMask & ~UsedLanes; + if ((UnusedLanes & ~DstIntLaneMask).any()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); + DstIntLaneMask |= UnusedLanes; + } + + // After duplicating the live ranges for the low/hi bits, we + // need to update the subranges of the DstReg interval such that + // for a case like this: + // + // entry: + // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) + // : + // if.then: + // 32B %1:gpr32 = MOVIMM32 .. + // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 + // + // Only the MOVIMM32 require a def of the top lanes and any intervals + // for the top 32-bits of the def at 16B should be removed. + for (LiveInterval::SubRange &SR : DstInt->subranges()) { + if (!Writes || RequiresImplicitRedef || + (SR.LaneMask & UnusedLanes).none()) + continue; + + assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && + "Unexpected lanemask. Subrange needs finer granularity"); + + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false); + auto SegmentI = SR.find(UseIdx); + if (SegmentI != SR.end()) + SR.removeSegment(SegmentI, true); + } + } + } + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // turn a full def into a read-modify-write sub-register def and vice // versa. if (SubIdx && MO.isDef()) - MO.setIsUndef(!Reads); + MO.setIsUndef(!Reads || RequiresImplicitRedef); // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } +/// For a given use of value \p Idx, it returns the def in the current block, +/// or otherwise all possible defs in preceding blocks. +static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks, + SmallVector<MachineInstr *> &Instrs, + LiveIntervals *LIS, LiveInterval &SrcInt, + MachineBasicBlock *MBB, VNInfo *Idx) { + if (!Idx->isPHIDef()) { + MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); + assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); + Instrs.push_back(Def); + return true; + } + + bool Any = false; + if (VisitedBlocks.count(MBB)) + return false; + VisitedBlocks.insert(MBB); + for (MachineBasicBlock *Pred : MBB->predecessors()) { + Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred, + SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred))); + } + return Any; +} + bool RegisterCoalescer::joinCopy( MachineInstr *CopyMI, bool &Again, SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { @@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy( }); } + SmallVector<MachineInstr *> SubregToRegSrcInsts; + if (CopyMI->isSubregToReg()) { + // For the case where the copy instruction is a SUBREG_TO_REG, e.g. + // + // %0:gpr32 = movimm32 .. + // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 + // ... + // %0:gpr32 = COPY <something> + // + // After joining liveranges, the original `movimm32` will need an + // implicit-def to make it explicit that the entire register is written, + // i.e. + // + // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 + // ... + // undef %0.sub32:gpr64 = COPY <something> // Note that this does not + // // require an implicit-def, + // // because it has nothing to + // // do with the SUBREG_TO_REG. + LiveInterval &SrcInt = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); + SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks; + if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt, + CopyMI->getParent(), + SrcInt.Query(SubregToRegSlotIdx).valueIn())) + llvm_unreachable("SUBREG_TO_REG src requires a def"); + } + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy( // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) + if (CP.getDstIdx()) { + assert(SubregToRegSrcInsts.empty() && "can this happen?"); updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + } + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + SubregToRegSrcInsts); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d3df434..a43020e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/ByteProvider.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SDPatternMatch.h" @@ -15262,23 +15263,31 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { } } - // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller - // than X, and the And doesn't change the lower iX bits, we can move the - // AssertZext in front of the And and drop the AssertSext. if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND && - N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext && isa<ConstantSDNode>(N0.getOperand(1))) { - SDValue BigA = N0.getOperand(0); - EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); const APInt &Mask = N0.getConstantOperandAPInt(1); - if (AssertVT.bitsLT(BigA_AssertVT) && - Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) { - SDLoc DL(N); - SDValue NewAssert = - DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1); - return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert, - N0.getOperand(1)); + + // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller + // than X, and the And doesn't change the lower iX bits, we can move the + // AssertZext in front of the And and drop the AssertSext. + if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) { + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + if (AssertVT.bitsLT(BigA_AssertVT) && + Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) { + SDLoc DL(N); + SDValue NewAssert = + DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1); + return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert, + N0.getOperand(1)); + } } + + // Remove AssertZext entirely if the mask guarantees the assertion cannot + // fail. + // TODO: Use KB countMinLeadingZeros to handle non-constant masks? + if (Mask.isIntN(AssertVT.getScalarSizeInBits())) + return N0; } return SDValue(); @@ -22778,8 +22787,10 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, // we can remove the store. - if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - StoreSize.getFixedValue() * 8)) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + if (LifetimeEndBase.contains( + DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8, + StoreBase, StoreSize.getFixedValue() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); @@ -28971,13 +28982,100 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return SDValue(); } +static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, + const TargetLowering &TLI) { + // Match a pattern such as: + // (X | (X >> C0) | (X >> C1) | ...) & Mask + // This extracts contiguous parts of X and ORs them together before comparing. + // We can optimize this so that we directly check (X & SomeMask) instead, + // eliminating the shifts. + + EVT VT = Root.getValueType(); + + // TODO: Support vectors? + if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND) + return SDValue(); + + SDValue N0 = Root.getOperand(0); + SDValue N1 = Root.getOperand(1); + + if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1)) + return SDValue(); + + APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal(); + + SDValue Src; + const auto IsSrc = [&](SDValue V) { + if (!Src) { + Src = V; + return true; + } + + return Src == V; + }; + + SmallVector<SDValue> Worklist = {N0}; + APInt PartsMask(VT.getSizeInBits(), 0); + while (!Worklist.empty()) { + SDValue V = Worklist.pop_back_val(); + if (!V.hasOneUse() && (Src && Src != V)) + return SDValue(); + + if (V.getOpcode() == ISD::OR) { + Worklist.push_back(V.getOperand(0)); + Worklist.push_back(V.getOperand(1)); + continue; + } + + if (V.getOpcode() == ISD::SRL) { + SDValue ShiftSrc = V.getOperand(0); + SDValue ShiftAmt = V.getOperand(1); + + if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt)) + return SDValue(); + + auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal(); + if (ShiftAmtVal > RootMask.getBitWidth()) + return SDValue(); + + PartsMask |= (RootMask << ShiftAmtVal); + continue; + } + + if (IsSrc(V)) { + PartsMask |= RootMask; + continue; + } + + return SDValue(); + } + + if (!Src) + return SDValue(); + + SDLoc DL(Root); + return DAG.getNode(ISD::AND, DL, VT, + {Src, DAG.getConstant(PartsMask, DL, VT)}); +} + /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); - return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); + if (SDValue C = + TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL)) + return C; + + if (ISD::isIntEqualitySetCC(Cond) && N0.getOpcode() == ISD::AND && + isNullConstant(N1)) { + + if (SDValue Res = matchMergedBFX(N0, DAG, TLI)) + return DAG.getSetCC(DL, VT, Res, N1, Cond); + } + + return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant, return @@ -29415,7 +29513,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { MachineMemOperand *MMO; }; - auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics { + auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics { if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) { int64_t Offset = 0; if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset())) @@ -29428,13 +29526,15 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { LSN->getBasePtr(), Offset /*base offset*/, LocationSize::precise(Size), LSN->getMemOperand()}; } - if (const auto *LN = cast<LifetimeSDNode>(N)) + if (const auto *LN = cast<LifetimeSDNode>(N)) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), 0, - LocationSize::precise(LN->getSize()), + LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())), (MachineMemOperand *)nullptr}; + } // Default. return {false /*isvolatile*/, /*isAtomic*/ false, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 74172b2..ba0ab23 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::FP_TO_FP16: LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); - if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + if (Node->getFlags().hasApproximateFuncs() && !TLI.useSoftFloat()) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 773ff48..02d1100 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -784,10 +784,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::TargetFrameIndex: ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); break; - case ISD::LIFETIME_START: - case ISD::LIFETIME_END: - ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); - break; case ISD::PSEUDO_PROBE: ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid()); ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex()); @@ -7847,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } - // Perform trivial constant folding. - if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) - return SV; + if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) { + switch (Opcode) { + case ISD::XOR: + case ISD::ADD: + case ISD::PTRADD: + case ISD::SUB: + case ISD::SIGN_EXTEND_INREG: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::MUL: + case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::UMIN: + case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::UMAX: + case ISD::SMAX: + case ISD::SMIN: + // fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison. + return N2.getOpcode() == ISD::POISON ? N2 : N1; + } + } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.isUndef()) { + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) { if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { case ISD::PTRADD: case ISD::SUB: - // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(undef, non_undef_arg2) -> undef. + return N1; case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: @@ -7868,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SREM: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + // fold op(undef, non_undef_arg2) -> 0. + return getConstant(0, DL, VT); } } } // Fold a bunch of operators when the RHS is undef. - if (N2.isUndef()) { + if (N2.getOpcode() == ISD::UNDEF) { switch (Opcode) { case ISD::XOR: - if (N1.isUndef()) + if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -7887,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ADD: case ISD::PTRADD: case ISD::SUB: + // fold op(arg1, undef) -> undef. + return N2; case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: - // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(arg1, undef) -> poison. + return getPOISON(VT); case ISD::MUL: case ISD::AND: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + case ISD::UMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0. + return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT); case ISD::OR: case ISD::SADDSAT: case ISD::UADDSAT: - // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> - // poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getAllOnesConstant(DL, VT); + case ISD::UMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1. + return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT); + case ISD::SMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL, + VT); + case ISD::SMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL, + VT); } } + // Perform trivial constant folding. + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) + return SV; + // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); @@ -9360,8 +9397,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, - SDValue Chain, int FrameIndex, - int64_t Size) { + SDValue Chain, int FrameIndex) { const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; const auto VTs = getVTList(MVT::Other); SDValue Ops[2] = { @@ -9373,13 +9409,12 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); ID.AddInteger(FrameIndex); - ID.AddInteger(Size); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - LifetimeSDNode *N = newSDNode<LifetimeSDNode>(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTs, Size); + LifetimeSDNode *N = + newSDNode<LifetimeSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs); createOperands(N, Ops); CSEMap.InsertNode(N, IP); InsertNode(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1636465..306e068 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); + SDNodeFlags Flags; + if (auto *TruncInst = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*TruncInst); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( - 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); + 0, dl, TLI.getPointerTy(DAG.getDataLayout())), + Flags)); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -7594,8 +7598,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (TM.getOptLevel() == CodeGenOptLevel::None) return; - const int64_t ObjectSize = - cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1)); // First check that the Alloca is static, otherwise it won't have a @@ -7605,7 +7607,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; const int FrameIndex = SI->second; - Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize); + Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex); DAG.setRoot(Res); return; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9474587..900da76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -946,8 +946,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << " -> " << ASC->getDestAddressSpace() << ']'; - } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) { - OS << "<0 to " << LN->getSize() << ">"; } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) { OS << '<' << AA->getAlign().value() << '>'; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1764910..48d6b99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9471,7 +9471,7 @@ SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, ISD::SRL, DL, VT, DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg), DAG.getConstant(DeBruijn, DL, VT)), - DAG.getConstant(ShiftAmt, DL, VT)); + DAG.getShiftAmountConstant(ShiftAmt, VT, DL)); Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD)); SmallVector<uint8_t> Table(BitWidth, 0); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index d4a3455..3c91b0e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() { ISD::SDIVFIX, ISD::SDIVFIXSAT, ISD::UDIVFIX, ISD::UDIVFIXSAT, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, - ISD::IS_FPCLASS}, + ISD::IS_FPCLASS, ISD::FCBRT, + ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, + ISD::FEXP2, ISD::FEXP10, + ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FCEIL, ISD::FRINT, + ISD::FTRUNC, ISD::FROUNDEVEN, + ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, + ISD::FCOSH, ISD::FSINH, + ISD::FTANH, ISD::FATAN2}, VT, Expand); // Overflow operations default to expand @@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT.isVector()) - setOperationAction( - {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, - ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, - ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, - VT, Expand); + setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR, + ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND}, + VT, Expand); // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ @@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() { {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, Expand); - // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, - ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, - ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, - ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, - ISD::FATAN2}, - {MVT::f32, MVT::f64, MVT::f128}, Expand); - // Insert custom handling default for llvm.canonicalize.*. setOperationAction(ISD::FCANONICALIZE, {MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand); @@ -2062,7 +2062,7 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (M.getDirectAccessExternalData() && - !TM.getTargetTriple().isWindowsGNUEnvironment() && + !TM.getTargetTriple().isOSCygMing() && !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) && (!TM.getTargetTriple().isOSDarwin() || diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 408d07b..e9172f4 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1060,27 +1060,27 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( auto &Context = getContext(); if (Kind.isMergeableConst4() && MergeableConst4Section) - return Context.getELFSection(".rodata.cst4." + SectionSuffix, + return Context.getELFSection(".rodata.cst4." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); if (Kind.isMergeableConst8() && MergeableConst8Section) - return Context.getELFSection(".rodata.cst8." + SectionSuffix, + return Context.getELFSection(".rodata.cst8." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); if (Kind.isMergeableConst16() && MergeableConst16Section) - return Context.getELFSection(".rodata.cst16." + SectionSuffix, + return Context.getELFSection(".rodata.cst16." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); if (Kind.isMergeableConst32() && MergeableConst32Section) - return Context.getELFSection(".rodata.cst32." + SectionSuffix, + return Context.getELFSection(".rodata.cst32." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); if (Kind.isReadOnly()) - return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); + return Context.getELFSection(".rodata." + SectionSuffix + ".", + ELF::SHT_PROGBITS, ELF::SHF_ALLOC); assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return Context.getELFSection(".data.rel.ro." + SectionSuffix, + return Context.getELFSection(".data.rel.ro." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); } @@ -1734,7 +1734,8 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF, /*AddSegmentInfo=*/false) || Name == getInstrProfSectionName(IPSK_covname, Triple::COFF, - /*AddSegmentInfo=*/false)) + /*AddSegmentInfo=*/false) || + Name == ".llvmbc" || Name == ".llvmcmd") Kind = SectionKind::getMetadata(); int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); diff --git a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp index 6267207..fd54190 100644 --- a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp +++ b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp @@ -369,6 +369,19 @@ static GlobalVariable *getOrCreateRefVariable( AddrOfOldGV, Twine("__ref_").concat(GV->getName()), nullptr, GlobalVariable::NotThreadLocal); + // RefGV is created with isConstant = false, but we want to place RefGV into + // .rdata, not .data. It is important that the GlobalVariable be mutable + // from the compiler's point of view, so that the optimizer does not remove + // the global variable entirely and replace all references to it with its + // initial value. + // + // When the Windows hot-patch loader applies a hot-patch, it maps the + // pages of .rdata as read/write so that it can set each __ref_* variable + // to point to the original variable in the base image. Afterward, pages in + // .rdata are remapped as read-only. This protects the __ref_* variables from + // being overwritten during execution. + RefGV->setSection(".rdata"); + // Create debug info for the replacement global variable. DataLayout Layout = M->getDataLayout(); DIType *DebugType = DebugInfo.createPointerType( |