diff options
Diffstat (limited to 'llvm/lib/CodeGen')
44 files changed, 1008 insertions, 293 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 5d7c97a..6356d71 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -37,8 +37,8 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, // unsigned long personality; /* Pointer to the personality routine */ // } - auto *EHInfo = - cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection()); + auto *EHInfo = static_cast<MCSectionXCOFF *>( + Asm->getObjFileLowering().getCompactUnwindSection()); if (Asm->TM.getFunctionSections()) { // If option -ffunction-sections is on, append the function name to the // name of EH Info Table csect so that each function has its own EH Info diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index de6ebcf..51342c6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -39,7 +39,7 @@ void ARMException::beginFunction(const MachineFunction *MF) { if (CFISecType == AsmPrinter::CFISection::Debug) { if (!hasEmittedCFISections) { if (Asm->getModuleCFISectionType() == AsmPrinter::CFISection::Debug) - Asm->OutStreamer->emitCFISections(false, true); + Asm->OutStreamer->emitCFISections(false, true, false); hasEmittedCFISections = true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f1d3e96..1641c3e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1654,6 +1654,88 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { *StackUsageStream << "static\n"; } +/// Extracts a generalized numeric type identifier of a Function's type from +/// type metadata. Returns null if metadata cannot be found. +static ConstantInt *extractNumericCGTypeId(const Function &F) { + SmallVector<MDNode *, 2> Types; + F.getMetadata(LLVMContext::MD_type, Types); + for (const auto &Type : Types) { + if (Type->hasGeneralizedMDString()) { + MDString *MDGeneralizedTypeId = cast<MDString>(Type->getOperand(1)); + uint64_t TypeIdVal = llvm::MD5Hash(MDGeneralizedTypeId->getString()); + IntegerType *Int64Ty = Type::getInt64Ty(F.getContext()); + return ConstantInt::get(Int64Ty, TypeIdVal); + } + } + return nullptr; +} + +/// Emits .callgraph section. +void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, + FunctionInfo &FuncInfo) { + if (!MF.getTarget().Options.EmitCallGraphSection) + return; + + // Switch to the call graph section for the function + MCSection *FuncCGSection = + getObjFileLowering().getCallGraphSection(*getCurrentSection()); + assert(FuncCGSection && "null callgraph section"); + OutStreamer->pushSection(); + OutStreamer->switchSection(FuncCGSection); + + // Emit format version number. + OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0); + + // Emit function's self information, which is composed of: + // 1) FunctionEntryPc + // 2) FunctionKind: Whether the function is indirect target, and if so, + // whether its type id is known. + // 3) FunctionTypeId: Emit only when the function is an indirect target + // and its type id is known. + + // Emit function entry pc. + const MCSymbol *FunctionSymbol = getFunctionBegin(); + OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + + // If this function has external linkage or has its address taken and + // it is not a callback, then anything could call it. + const Function &F = MF.getFunction(); + bool IsIndirectTarget = + !F.hasLocalLinkage() || F.hasAddressTaken(nullptr, + /*IgnoreCallbackUses=*/true, + /*IgnoreAssumeLikeCalls=*/true, + /*IgnoreLLVMUsed=*/false); + + // FIXME: FunctionKind takes a few values but emitted as a 64-bit value. + // Can be optimized to occupy 2 bits instead. + // Emit function kind, and type id if available. + if (!IsIndirectTarget) { + OutStreamer->emitInt64( + static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET)); + } else { + if (const auto *TypeId = extractNumericCGTypeId(F)) { + OutStreamer->emitInt64(static_cast<uint64_t>( + FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID)); + OutStreamer->emitInt64(TypeId->getZExtValue()); + } else { + OutStreamer->emitInt64(static_cast<uint64_t>( + FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID)); + } + } + + // Emit callsite labels, where each element is a pair of type id and + // indirect callsite pc. + const auto &CallSiteLabels = FuncInfo.CallSiteLabels; + OutStreamer->emitInt64(CallSiteLabels.size()); + for (const auto &[TypeId, Label] : CallSiteLabels) { + OutStreamer->emitInt64(TypeId); + OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize()); + } + FuncInfo.CallSiteLabels.clear(); + + OutStreamer->popSection(); +} + void AsmPrinter::emitPCSectionsLabel(const MachineFunction &MF, const MDNode &MD) { MCSymbol *S = MF.getContext().createTempSymbol("pcsection"); @@ -1784,6 +1866,23 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) { return Name; } +void AsmPrinter::emitIndirectCalleeLabels( + FunctionInfo &FuncInfo, + const MachineFunction::CallSiteInfoMap &CallSitesInfoMap, + const MachineInstr &MI) { + // Only indirect calls have type identifiers set. + const auto &CallSiteInfo = CallSitesInfoMap.find(&MI); + if (CallSiteInfo == CallSitesInfoMap.end()) + return; + + for (ConstantInt *CalleeTypeId : CallSiteInfo->second.CalleeTypeIds) { + MCSymbol *S = MF->getContext().createTempSymbol(); + OutStreamer->emitLabel(S); + uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue(); + FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S); + } +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::emitFunctionBody() { @@ -1830,6 +1929,8 @@ void AsmPrinter::emitFunctionBody() { MBBSectionRanges[MF->front().getSectionID()] = MBBSectionRange{CurrentFnBegin, nullptr}; + FunctionInfo FuncInfo; + const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); @@ -1963,6 +2064,9 @@ void AsmPrinter::emitFunctionBody() { break; } + if (TM.Options.EmitCallGraphSection && MI.isCall()) + emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI); + // If there is a post-instruction symbol, emit a label for it here. if (MCSymbol *S = MI.getPostInstrSymbol()) OutStreamer->emitLabel(S); @@ -2142,6 +2246,9 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing stack size metadata. emitStackSizeSection(*MF); + // Emit section containing call graph metadata. + emitCallGraphSection(*MF, FuncInfo); + // Emit .su file containing function stack size information. emitStackUsage(*MF); @@ -2841,6 +2948,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || + MF.getTarget().Options.EmitCallGraphSection || MF.getTarget().Options.BBAddrMap) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) @@ -4221,10 +4329,11 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { SectionKind Kind = CPE.getSectionKind(&DL); const Constant *C = CPE.Val.ConstVal; Align Alignment = CPE.Alignment; - if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( - getObjFileLowering().getSectionForConstant(DL, Kind, C, - Alignment))) { - if (MCSymbol *Sym = S->getCOMDATSymbol()) { + auto *S = + getObjFileLowering().getSectionForConstant(DL, Kind, C, Alignment); + if (S && TM.getTargetTriple().isOSBinFormatCOFF()) { + if (MCSymbol *Sym = + static_cast<const MCSectionCOFF *>(S)->getCOMDATSymbol()) { if (Sym->isUndefined()) OutStreamer->emitSymbolAttribute(Sym, MCSA_Global); return Sym; diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8abeb56..c5d6e40 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1051,10 +1051,10 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { // comdat key. A section may be comdat because of -ffunction-sections or // because it is comdat in the IR. MCSectionCOFF *GVSec = - GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr; + GVSym ? static_cast<MCSectionCOFF *>(&GVSym->getSection()) : nullptr; const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr; - MCSectionCOFF *DebugSec = cast<MCSectionCOFF>( + auto *DebugSec = static_cast<MCSectionCOFF *>( CompilerInfoAsm->getObjFileLowering().getCOFFDebugSymbolsSection()); DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 4fac4bb..6b8d08c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -109,9 +109,11 @@ void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) { // chose not to be verbose in that case. And with `ForceDwarfFrameSection`, // we should always emit .debug_frame. if (CFISecType == AsmPrinter::CFISection::Debug || - Asm->TM.Options.ForceDwarfFrameSection) + Asm->TM.Options.ForceDwarfFrameSection || + Asm->TM.Options.MCOptions.EmitSFrameUnwind) Asm->OutStreamer->emitCFISections( - CFISecType == AsmPrinter::CFISection::EH, true); + CFISecType == AsmPrinter::CFISection::EH, true, + Asm->TM.Options.MCOptions.EmitSFrameUnwind); hasEmittedCFISections = true; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 11b8576..7188833 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -972,10 +972,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // the call graph which could lead to some target function. For tail // calls, no return PC information is needed, unless tuning for GDB in // DWARF4 mode in which case we fake a return PC for compatibility. - const MCSymbol *PCAddr = - (!IsTail || CU.useGNUAnalogForDwarf5Feature()) - ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI)) - : nullptr; + const MCSymbol *PCAddr = (!IsTail || CU.useGNUAnalogForDwarf5Feature()) + ? getLabelAfterInsn(TopLevelCallMI) + : nullptr; // For tail calls, it's necessary to record the address of the branch // instruction so that the debugger can show where the tail call occurred. diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 3b3e7a4..dcfd9aa 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -2083,22 +2083,55 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (TBB == FBB) { MBB->splice(Loc, TBB, TBB->begin(), TIB); } else { + // Merge the debug locations, and hoist and kill the debug instructions from + // both branches. FIXME: We could probably try harder to preserve some debug + // instructions (but at least this isn't producing wrong locations). + MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc); + auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) { + assert(DI->isDebugInstr() && "Expected a debug instruction"); + if (DI->isDebugRef()) { + const TargetInstrInfo *TII = + MBB->getParent()->getSubtarget().getInstrInfo(); + const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE); + DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0, + DI->getDebugVariable(), DI->getDebugExpression()); + MBB->insert(Loc, &*DI); + return; + } + // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF. + if (DI->isDebugPHI()) { + DI->eraseFromParent(); + return; + } + // Move DBG_LABELs without modifying them. Set DBG_VALUEs undef. + if (!DI->isDebugLabel()) + DI->setDebugValueUndef(); + DI->moveBefore(&*Loc); + }; + // TIB and FIB point to the end of the regions to hoist/merge in TBB and // FBB. MachineBasicBlock::iterator FE = FIB; MachineBasicBlock::iterator FI = FBB->begin(); for (MachineBasicBlock::iterator TI : make_early_inc_range(make_range(TBB->begin(), TIB))) { - // Move debug instructions and pseudo probes without modifying them. - // FIXME: This is the wrong thing to do for debug locations, which - // should at least be killed (and hoisted from BOTH blocks). - if (TI->isDebugOrPseudoInstr()) { - TI->moveBefore(&*Loc); + // Hoist and kill debug instructions from FBB. After this loop FI points + // to the next non-debug instruction to hoist (checked in assert after the + // TBB debug instruction handling code). + while (FI != FE && FI->isDebugInstr()) + HoistAndKillDbgInstr(FI++); + + // Kill debug instructions before moving. + if (TI->isDebugInstr()) { + HoistAndKillDbgInstr(TI); continue; } - // Get the next non-meta instruction in FBB. - FI = skipDebugInstructionsForward(FI, FE, false); + // FI and TI now point to identical non-debug instructions. + assert(FI != FE && "Unexpected end of FBB range"); + // Pseudo probes are excluded from the range when identifying foldable + // instructions, so we don't expect to see one now. + assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range"); // NOTE: The loop above checks CheckKillDead but we can't do that here as // it modifies some kill markers after the check. assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) && @@ -2111,6 +2144,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { ++FI; } } + FBB->erase(FBB->begin(), FIB); if (UpdateLiveIns) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index c21058c..f16283b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2095,6 +2095,10 @@ static bool isRemOfLoopIncrementWithLoopInvariant( if (!L->isLoopInvariant(RemAmt)) return false; + // Only works if the AddOffset is a loop invaraint + if (AddOffset && !L->isLoopInvariant(AddOffset)) + return false; + // Is the PHI a loop increment? auto LoopIncrInfo = getIVIncrement(PN, LI); if (!LoopIncrInfo) @@ -2765,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return optimizeGatherScatterInst(II, II->getArgOperand(0)); case Intrinsic::masked_scatter: return optimizeGatherScatterInst(II, II->getArgOperand(1)); + case Intrinsic::masked_load: + // Treat v1X masked load as load X type. + if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(0); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; + case Intrinsic::masked_store: + // Treat v1X masked store as store X type. + if (auto *VT = + dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) { + if (VT->getNumElements() == 1) { + Value *PtrVal = II->getArgOperand(1); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS)) + return true; + } + } + return false; } SmallVector<Value *, 2> PtrOps; diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 9512f79..810dc29 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -101,6 +101,7 @@ CGOPT(EABI, EABIVersion) CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) +CGOPT(bool, EnableCallGraphSection) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableStaticDataPartitioning) @@ -461,6 +462,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableAddrsig); + static cl::opt<bool> EnableCallGraphSection( + "call-graph-section", cl::desc("Emit a call graph section"), + cl::init(false)); + CGBINDOPT(EnableCallGraphSection); + static cl::opt<bool> EmitCallSiteInfo( "emit-call-site-info", cl::desc( @@ -595,6 +601,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EnableStaticDataPartitioning = getEnableStaticDataPartitioning(); Options.EmitAddrsig = getEnableAddrsig(); + Options.EmitCallGraphSection = getEnableCallGraphSection(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 8855740f..9b2851e 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -2186,19 +2186,16 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, llvm_unreachable("Deinterleave node should already have ReplacementNode"); break; case ComplexDeinterleavingOperation::Splat: { - auto *NewTy = VectorType::getDoubleElementsVectorType( - cast<VectorType>(Node->Real->getType())); auto *R = dyn_cast<Instruction>(Node->Real); auto *I = dyn_cast<Instruction>(Node->Imag); if (R && I) { // Splats that are not constant are interleaved where they are located Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode(); IRBuilder<> IRB(InsertPoint); - ReplacementNode = IRB.CreateIntrinsic(Intrinsic::vector_interleave2, - NewTy, {Node->Real, Node->Imag}); + ReplacementNode = IRB.CreateVectorInterleave({Node->Real, Node->Imag}); } else { - ReplacementNode = Builder.CreateIntrinsic( - Intrinsic::vector_interleave2, NewTy, {Node->Real, Node->Imag}); + ReplacementNode = + Builder.CreateVectorInterleave({Node->Real, Node->Imag}); } break; } @@ -2226,10 +2223,7 @@ Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0); auto *A = replaceNode(Builder, Node->Operands[0]); auto *B = replaceNode(Builder, Node->Operands[1]); - auto *NewMaskTy = VectorType::getDoubleElementsVectorType( - cast<VectorType>(MaskReal->getType())); - auto *NewMask = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, - NewMaskTy, {MaskReal, MaskImag}); + auto *NewMask = Builder.CreateVectorInterleave({MaskReal, MaskImag}); ReplacementNode = Builder.CreateSelect(NewMask, A, B); break; } @@ -2260,8 +2254,8 @@ void ComplexDeinterleavingGraph::processReductionSingle( } if (!NewInit) - NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, - {Init, Constant::getNullValue(VTy)}); + NewInit = + Builder.CreateVectorInterleave({Init, Constant::getNullValue(VTy)}); NewPHI->addIncoming(NewInit, Incoming); NewPHI->addIncoming(OperationReplacement, BackEdge); @@ -2281,16 +2275,12 @@ void ComplexDeinterleavingGraph::processReductionOperation( auto *OldPHIImag = ReductionInfo[Imag].first; auto *NewPHI = OldToNewPHI[OldPHIReal]; - auto *VTy = cast<VectorType>(Real->getType()); - auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); - // We have to interleave initial origin values coming from IncomingBlock Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming); Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming); IRBuilder<> Builder(Incoming->getTerminator()); - auto *NewInit = Builder.CreateIntrinsic(Intrinsic::vector_interleave2, NewVTy, - {InitReal, InitImag}); + auto *NewInit = Builder.CreateVectorInterleave({InitReal, InitImag}); NewPHI->addIncoming(NewInit, Incoming); NewPHI->addIncoming(OperationReplacement, BackEdge); diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 714ec55..1c1047c 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -103,10 +103,10 @@ static void expandFPToI(Instruction *FPToI) { Value *A1 = nullptr; if (FloatVal->getType()->isHalfTy()) { if (FPToI->getOpcode() == Instruction::FPToUI) { - Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32)); + Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty()); A1 = Builder.CreateZExt(A0, IntTy); } else { // FPToSI - Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32)); + Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty()); A1 = Builder.CreateSExt(A0, IntTy); } FPToI->replaceAllUsesWith(A1); @@ -425,8 +425,8 @@ static void expandIToFP(Instruction *IToFP) { AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4); AAddr0->addIncoming(Shl, SwBB); Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty()); - Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2)); - Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1)); + Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2)); + Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1)); Value *Conv16 = Builder.CreateZExt(A2, IntTy); Value *Or17 = Builder.CreateOr(AAddr0, Conv16); Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1)); @@ -457,9 +457,9 @@ static void expandIToFP(Instruction *IToFP) { Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32)); Value *ExtractT62 = nullptr; if (FloatWidth > 80) - ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64)); + ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty()); else - ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32)); + ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty()); Builder.CreateBr(IfEnd26); // if.else: @@ -475,7 +475,7 @@ static void expandIToFP(Instruction *IToFP) { Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32)); Value *ExtractT66 = nullptr; if (FloatWidth > 80) - ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64)); + ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty()); else ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty()); Builder.CreateBr(IfEnd26); @@ -507,30 +507,29 @@ static void expandIToFP(Instruction *IToFP) { Builder.getIntN(BitWidth, 63)); And29 = Builder.CreateAnd(Shr, Temp2, "and29"); } else { - Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32)); + Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty()); And29 = Builder.CreateAnd( - Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000)); + Conv28, ConstantInt::getSigned(Builder.getInt32Ty(), 0x80000000)); } unsigned TempMod = FPMantissaWidth % 32; Value *And34 = nullptr; Value *Shl30 = nullptr; if (FloatWidth > 80) { TempMod += 32; - Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod)); + Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod)); Shl30 = Builder.CreateAdd( - Add, - Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod)); - And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128)); + Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod)); + And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty()); } else { - Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod)); + Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod)); Shl30 = Builder.CreateAdd( - Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod)); + Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod)); And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0, - Builder.getIntN(32, (1 << TempMod) - 1)); + Builder.getInt32((1 << TempMod) - 1)); } Value *Or35 = nullptr; if (FloatWidth > 80) { - Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128)); + Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty()); Value *Or31 = Builder.CreateOr(And29Trunc, And34); Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64)); Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1), diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 012d873..9ba1782 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -1009,7 +1009,8 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + MIRBuilder.materializeObjectPtrOffset(Addr, DemoteReg, OffsetLLTy, + Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); @@ -1039,7 +1040,8 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, for (unsigned I = 0; I < NumValues; ++I) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]); + MIRBuilder.materializeObjectPtrOffset(Addr, DemoteReg, OffsetLLTy, + Offsets[I]); auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, MRI.getType(VRegs[I]), commonAlignment(BaseAlign, Offsets[I])); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e8f513a..e84ba91 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5949,8 +5949,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, const TargetOptions &Options = MF->getTarget().Options; LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - if (CanReassociate && - !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc)) return false; // Floating-point multiply-add with intermediate rounding. @@ -5962,8 +5961,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, if (!HasFMAD && !HasFMA) return false; - AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD; + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD; // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index dc5dfab..fd38c30 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1409,7 +1409,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializeObjectPtrOffset(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(LI); @@ -1448,7 +1448,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { for (unsigned i = 0; i < Vals.size(); ++i) { Register Addr; - MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8); + MIRBuilder.materializeObjectPtrOffset(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); Align BaseAlign = getMemOpAlign(SI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ed7b07f..d9d3569 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4170,7 +4170,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst); + auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst); auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO); @@ -4277,8 +4277,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { LLT PtrTy = MRI.getType(PtrReg); auto OffsetCst = MIRBuilder.buildConstant( LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst); + auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst); MachineMemOperand *LargeMMO = MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); @@ -5349,7 +5348,8 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, unsigned ByteOffset = Offset / 8; Register NewAddrReg; - MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy, + ByteOffset); MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, ByteOffset, PartTy); @@ -8004,7 +8004,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) { if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly. return UnableToLegalize; - if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) { + if (MI.getFlag(MachineInstr::FmAfn)) { unsigned Flags = MI.getFlags(); auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags); MIRBuilder.buildFPTrunc(Dst, Src32, Flags); @@ -9822,7 +9822,7 @@ LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val, if (DstOff != 0) { auto Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); - Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0); } MIB.buildStore(Value, Ptr, *StoreMMO); @@ -9962,7 +9962,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, LLT SrcTy = MRI.getType(Src); Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); @@ -9970,7 +9970,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register StorePtr = Dst; if (CurrOffset != 0) { LLT DstTy = MRI.getType(Dst); - StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); @@ -10060,7 +10060,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, LLT SrcTy = MRI.getType(Src); auto Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -10078,7 +10078,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, LLT DstTy = MRI.getType(Dst); auto Offset = MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 121d7e8..27df7e3 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -208,11 +208,20 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0, return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags); } +MachineInstrBuilder MachineIRBuilder::buildObjectPtrOffset(const DstOp &Res, + const SrcOp &Op0, + const SrcOp &Op1) { + return buildPtrAdd(Res, Op0, Op1, + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); +} + std::optional<MachineInstrBuilder> MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, - const LLT ValueTy, uint64_t Value) { + const LLT ValueTy, uint64_t Value, + std::optional<unsigned> Flags) { assert(Res == 0 && "Res is a result argument"); - assert(ValueTy.isScalar() && "invalid offset type"); + assert(ValueTy.isScalar() && "invalid offset type"); if (Value == 0) { Res = Op0; @@ -221,7 +230,14 @@ MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0, Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); auto Cst = buildConstant(ValueTy, Value); - return buildPtrAdd(Res, Op0, Cst.getReg(0)); + return buildPtrAdd(Res, Op0, Cst.getReg(0), Flags); +} + +std::optional<MachineInstrBuilder> MachineIRBuilder::materializeObjectPtrOffset( + Register &Res, Register Op0, const LLT ValueTy, uint64_t Value) { + return materializePtrAdd(Res, Op0, ValueTy, Value, + MachineInstr::MIFlag::NoUWrap | + MachineInstr::MIFlag::InBounds); } MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1b69188..5e50898 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -253,6 +253,21 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, return false; } +static Value *getMaskOperand(IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unexpected intrinsic"); + case Intrinsic::vp_load: + return II->getOperand(1); + case Intrinsic::masked_load: + return II->getOperand(2); + case Intrinsic::vp_store: + return II->getOperand(2); + case Intrinsic::masked_store: + return II->getOperand(3); + } +} + // Return the corresponded deinterleaved mask, or nullptr if there is no valid // mask. static Value *getMask(Value *WideMask, unsigned Factor, @@ -268,8 +283,12 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( if (isa<ScalableVectorType>(Load->getType())) return false; - if (auto *LI = dyn_cast<LoadInst>(Load); - LI && !LI->isSimple()) + auto *LI = dyn_cast<LoadInst>(Load); + auto *II = dyn_cast<IntrinsicInst>(Load); + if (!LI && !II) + return false; + + if (LI && !LI->isSimple()) return false; // Check if all users of this load are shufflevectors. If we encounter any @@ -322,7 +341,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( // Holds the corresponding index for each DE-interleave shuffle. SmallVector<unsigned, 4> Indices; - Type *VecTy = FirstSVI->getType(); + VectorType *VecTy = cast<VectorType>(FirstSVI->getType()); // Check if other shufflevectors are also DE-interleaved of the same type // and factor as the first shufflevector. @@ -360,13 +379,16 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load); Value *Mask = nullptr; - if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) { - Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); + if (LI) { + LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); + } else { + // Check mask operand. Handle both all-true/false and interleaved mask. + Mask = getMask(getMaskOperand(II), Factor, VecTy); if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n"); - } else { - LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); + + LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: " + << *Load << "\n"); } // Try to create target specific intrinsics to replace the load and @@ -483,15 +505,16 @@ bool InterleavedAccessImpl::tryReplaceExtracts( bool InterleavedAccessImpl::lowerInterleavedStore( Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) { Value *StoredValue; - if (auto *SI = dyn_cast<StoreInst>(Store)) { + auto *SI = dyn_cast<StoreInst>(Store); + auto *II = dyn_cast<IntrinsicInst>(Store); + if (SI) { if (!SI->isSimple()) return false; StoredValue = SI->getValueOperand(); - } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { - assert(VPStore->getIntrinsicID() == Intrinsic::vp_store); - StoredValue = VPStore->getArgOperand(0); } else { - llvm_unreachable("unsupported store operation"); + assert(II->getIntrinsicID() == Intrinsic::vp_store || + II->getIntrinsicID() == Intrinsic::masked_store); + StoredValue = II->getArgOperand(0); } auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue); @@ -508,18 +531,18 @@ bool InterleavedAccessImpl::lowerInterleavedStore( "number of stored element should be a multiple of Factor"); Value *Mask = nullptr; - if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { + if (SI) { + LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); + } else { + // Check mask operand. Handle both all-true/false and interleaved mask. unsigned LaneMaskLen = NumStoredElements / Factor; - Mask = getMask(VPStore->getMaskParam(), Factor, + Mask = getMask(getMaskOperand(II), Factor, ElementCount::getFixed(LaneMaskLen)); if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store - << "\n"); - - } else { - LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: " + << *Store << "\n"); } // Try to create target specific intrinsics to replace the store and @@ -564,6 +587,27 @@ static Value *getMask(Value *WideMask, unsigned Factor, } } + if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) { + // Check that the shuffle mask is: a) an interleave, b) all of the same + // set of the elements, and c) contained by the first source. (c) could + // be relaxed if desired. + unsigned NumSrcElts = + cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements(); + SmallVector<unsigned> StartIndexes; + if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor, + NumSrcElts * 2, StartIndexes) && + llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) && + llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) { + return Idx < (int)NumSrcElts; + })) { + auto *LeafMaskTy = + VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC); + IRBuilder<> Builder(SVI); + return Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0), + uint64_t(0)); + } + } + return nullptr; } @@ -590,21 +634,12 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( << " and factor = " << Factor << "\n"); } else { assert(II); - - // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask; - switch (II->getIntrinsicID()) { - default: + if (II->getIntrinsicID() != Intrinsic::masked_load && + II->getIntrinsicID() != Intrinsic::vp_load) return false; - case Intrinsic::vp_load: - WideMask = II->getOperand(1); - break; - case Intrinsic::masked_load: - WideMask = II->getOperand(2); - break; - } - Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); + // Check mask operand. Handle both all-true/false and interleaved mask. + Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI)); if (!Mask) return false; @@ -641,19 +676,11 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( Value *Mask = nullptr; if (II) { - // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask; - switch (II->getIntrinsicID()) { - default: + if (II->getIntrinsicID() != Intrinsic::masked_store && + II->getIntrinsicID() != Intrinsic::vp_store) return false; - case Intrinsic::vp_store: - WideMask = II->getOperand(2); - break; - case Intrinsic::masked_store: - WideMask = II->getOperand(3); - break; - } - Mask = getMask(WideMask, Factor, + // Check mask operand. Handle both all-true/false and interleaved mask. + Mask = getMask(getMaskOperand(II), Factor, cast<VectorType>(InterleaveValues[0]->getType())); if (!Mask) return false; @@ -687,11 +714,13 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { using namespace PatternMatch; for (auto &I : instructions(F)) { if (match(&I, m_CombineOr(m_Load(m_Value()), - m_Intrinsic<Intrinsic::vp_load>()))) + m_Intrinsic<Intrinsic::vp_load>())) || + match(&I, m_Intrinsic<Intrinsic::masked_load>())) Changed |= lowerInterleavedLoad(&I, DeadInsts); if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()), - m_Intrinsic<Intrinsic::vp_store>()))) + m_Intrinsic<Intrinsic::vp_store>())) || + match(&I, m_Intrinsic<Intrinsic::masked_store>())) Changed |= lowerInterleavedStore(&I, DeadInsts); if (auto *II = dyn_cast<IntrinsicInst>(&I)) { diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 7153902..8b72c29 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -217,6 +217,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) .Case("samesign", MIToken::kw_samesign) + .Case("inbounds", MIToken::kw_inbounds) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) @@ -616,6 +617,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Case("!range", MIToken::md_range) .Case("!DIExpression", MIToken::md_diexpr) .Case("!DILocation", MIToken::md_dilocation) + .Case("!noalias.addrspace", MIToken::md_noalias_addrspace) .Default(MIToken::Error); } diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index d7cd067..0627f17 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -78,6 +78,7 @@ struct MIToken { kw_nneg, kw_disjoint, kw_samesign, + kw_inbounds, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, @@ -151,6 +152,7 @@ struct MIToken { md_tbaa, md_alias_scope, md_noalias, + md_noalias_addrspace, md_range, md_diexpr, md_dilocation, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 3a364d5..6a464d9 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1477,7 +1477,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_nneg) || Token.is(MIToken::kw_disjoint) || Token.is(MIToken::kw_nusw) || - Token.is(MIToken::kw_samesign)) { + Token.is(MIToken::kw_samesign) || + Token.is(MIToken::kw_inbounds)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1518,6 +1519,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NoUSWrap; if (Token.is(MIToken::kw_samesign)) Flags |= MachineInstr::SameSign; + if (Token.is(MIToken::kw_inbounds)) + Flags |= MachineInstr::InBounds; lex(); } @@ -3482,6 +3485,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMDNode(AAInfo.NoAlias)) return true; break; + case MIToken::md_noalias_addrspace: + lex(); + if (parseMDNode(AAInfo.NoAliasAddrSpace)) + return true; + break; case MIToken::md_range: lex(); if (parseMDNode(Range)) @@ -3490,7 +3498,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { // TODO: Report an error on duplicate metadata nodes. default: return error("expected 'align' or '!tbaa' or '!alias.scope' or " - "'!noalias' or '!range'"); + "'!noalias' or '!range' or '!noalias.addrspace'"); } } if (expectAndConsume(MIToken::rparen)) diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 1e9fcf3..3e99e57 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -504,13 +504,21 @@ bool MIRParserImpl::initializeCallSiteInfo( return error(Error, ArgRegPair.Reg.SourceRange); CSInfo.ArgRegPairs.emplace_back(Reg, ArgRegPair.ArgNo); } + if (!YamlCSInfo.CalleeTypeIds.empty()) { + for (auto CalleeTypeId : YamlCSInfo.CalleeTypeIds) { + IntegerType *Int64Ty = Type::getInt64Ty(Context); + CSInfo.CalleeTypeIds.push_back(ConstantInt::get(Int64Ty, CalleeTypeId, + /*isSigned=*/false)); + } + } - if (TM.Options.EmitCallSiteInfo) + if (TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection) MF.addCallSiteInfo(&*CallI, std::move(CSInfo)); } - if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) - return error(Twine("Call site info provided but not used")); + if (!YamlMF.CallSitesInfo.empty() && + !(TM.Options.EmitCallSiteInfo || TM.Options.EmitCallGraphSection)) + return error("call site info provided but not used"); return false; } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 7710b50..ce1834a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -525,24 +525,30 @@ static void convertCallSiteObjects(yaml::MachineFunction &YMF, const MachineFunction &MF, ModuleSlotTracker &MST) { const auto *TRI = MF.getSubtarget().getRegisterInfo(); - for (auto CSInfo : MF.getCallSitesInfo()) { + for (auto [MI, CallSiteInfo] : MF.getCallSitesInfo()) { yaml::CallSiteInfo YmlCS; yaml::MachineInstrLoc CallLocation; // Prepare instruction position. - MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator(); + MachineBasicBlock::const_instr_iterator CallI = MI->getIterator(); CallLocation.BlockNum = CallI->getParent()->getNumber(); // Get call instruction offset from the beginning of block. CallLocation.Offset = std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; + + auto [ArgRegPairs, CalleeTypeIds] = CallSiteInfo; // Construct call arguments and theirs forwarding register info. - for (auto ArgReg : CSInfo.second.ArgRegPairs) { + for (auto ArgReg : ArgRegPairs) { yaml::CallSiteInfo::ArgRegPair YmlArgReg; YmlArgReg.ArgNo = ArgReg.ArgNo; printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI); YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg); } + // Get type ids. + for (auto *CalleeTypeId : CalleeTypeIds) { + YmlCS.CalleeTypeIds.push_back(CalleeTypeId->getZExtValue()); + } YMF.CallSitesInfo.push_back(std::move(YmlCS)); } @@ -814,6 +820,11 @@ static void printMI(raw_ostream &OS, MFPrintState &State, OS << "nusw "; if (MI.getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (MI.getFlag(MachineInstr::InBounds)) + OS << "inbounds "; + + // NOTE: Please add new MIFlags also to the MI_FLAGS_STR in + // llvm/utils/update_mir_test_checks.py. OS << TII->getName(MI.getOpcode()); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 429a17a..ec40f6a 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -211,8 +211,7 @@ void MachineFunction::init() { ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); - // FIXME: Use Function::hasOptSize(). - if (!F.getAlign() && !F.hasFnAttribute(Attribute::OptimizeForSize)) + if (!F.getAlign() && !F.hasOptSize()) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -699,6 +698,26 @@ bool MachineFunction::needsFrameMoves() const { !F.getParent()->debug_compile_units().empty(); } +MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) { + // Numeric callee_type ids are only for indirect calls. + if (!CB.isIndirectCall()) + return; + + MDNode *CalleeTypeList = CB.getMetadata(LLVMContext::MD_callee_type); + if (!CalleeTypeList) + return; + + for (const MDOperand &Op : CalleeTypeList->operands()) { + MDNode *TypeMD = cast<MDNode>(Op); + MDString *TypeIdStr = cast<MDString>(TypeMD->getOperand(1)); + // Compute numeric type id from generalized type id string + uint64_t TypeIdVal = MD5Hash(TypeIdStr->getString()); + IntegerType *Int64Ty = Type::getInt64Ty(CB.getContext()); + CalleeTypeIds.push_back( + ConstantInt::get(Int64Ty, TypeIdVal, /*IsSigned=*/false)); + } +} + namespace llvm { template<> @@ -920,7 +939,7 @@ MachineFunction::getCallSiteInfo(const MachineInstr *MI) { assert(MI->isCandidateForAdditionalCallInfo() && "Call site info refers only to call (MI) candidates"); - if (!Target.Options.EmitCallSiteInfo) + if (!Target.Options.EmitCallSiteInfo && !Target.Options.EmitCallGraphSection) return CallSitesInfo.end(); return CallSitesInfo.find(MI); } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index da3665b..79047f7 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -585,6 +585,8 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::NoUSWrap; if (GEP->hasNoUnsignedWrap()) MIFlags |= MachineInstr::MIFlag::NoUWrap; + if (GEP->isInBounds()) + MIFlags |= MachineInstr::MIFlag::InBounds; } // Copy the nonneg flag. @@ -1860,8 +1862,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::NoUSWrap)) + OS << "nusw "; if (getFlag(MachineInstr::SameSign)) OS << "samesign "; + if (getFlag(MachineInstr::InBounds)) + OS << "inbounds "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 0d25169..c612f8de 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1273,6 +1273,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ", !noalias "; AAInfo.NoAlias->printAsOperand(OS, MST); } + if (AAInfo.NoAliasAddrSpace) { + OS << ", !noalias.addrspace "; + AAInfo.NoAliasAddrSpace->printAsOperand(OS, MST); + } if (getRanges()) { OS << ", !range "; getRanges()->printAsOperand(OS, MST); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 9d5c39c..c6fa8f4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { TopCand.SU = nullptr; BotCand.SU = nullptr; - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } /// Initialize the per-region scheduling policy. @@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (TopCluster) { - dbgs() << " Top Cluster: "; - for (auto *N : *TopCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + TopClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (TopClusterID != InvalidClusterId) { + ClusterInfo *TopCluster = DAG->getCluster(TopClusterID); + dbgs() << " Top Cluster: "; + for (auto *N : *TopCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysReg(SU, true); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (BotCluster) { - dbgs() << " Bot Cluster: "; - for (auto *N : *BotCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + BotClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (BotClusterID != InvalidClusterId) { + ClusterInfo *BotCluster = DAG->getCluster(BotClusterID); + dbgs() << " Bot Cluster: "; + for (auto *N : *BotCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) @@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { if (!Bot.HazardRec) { Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); } - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, @@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, return TryCand.Reason != NoCand; // Keep clustered nodes together. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. @@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); + TopClusterID = SU->ParentClusterIdx; Top.bumpNode(SU); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); + BotClusterID = SU->ParentClusterIdx; Bot.bumpNode(SU); } } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 0f742c4..21bf052 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -423,7 +423,7 @@ void ModuloScheduleExpander::generateExistingPhis( // potentially define two values. unsigned MaxPhis = PrologStage + 2; if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + MaxPhis = std::max((int)MaxPhis - LoopValStage, 1); unsigned NumPhis = std::min(NumStages, MaxPhis); Register NewReg; diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 69b9291..2400a1f 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -178,10 +178,8 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg, for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid(); ++Aliases) { for (MachineOperand &MO : MRI->reg_operands(*Aliases)) { - if (MO.readsReg()) { + if (MO.readsReg()) MO.setIsUndef(true); - LIS->removeAllRegUnitsForPhysReg(MO.getReg()); - } } } } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 2d7987a..7ede564 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,7 +306,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p SubregToRegSrcInst is not empty, we are coalescing a + /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an + /// implicit-def of DstReg on instructions that define SrcReg. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInst = {}); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1443,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector<MachineOperand, 4> ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1457,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1499,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // We're only expecting another def of the main output, so the range - // should get updated with the regular output range. - // - // FIXME: The range updating below probably needs updating to look at - // the super register if subranges are tracked. - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "subrange update for implicit-def of super register may not be " - "properly handled"); + // If lanemasks need to be tracked, compile the lanemask of the NewMI + // implicit def operands to avoid subranges for the super-regs from + // being removed by code later on in this function. + if (MRI->shouldTrackSubRegLiveness(MO.getReg())) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } } @@ -1606,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none()) { + if ((SR.LaneMask & DstMask).none() && + (SR.LaneMask & NewMIImplicitOpsMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1870,11 +1877,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } } -void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { +void RegisterCoalescer::updateRegDefsUses( + Register SrcReg, Register DstReg, unsigned SubIdx, + ArrayRef<MachineInstr *> SubregToRegSrcInsts) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + // Coalescing a COPY may expose reads of 'undef' subregisters. + // If so, then explicitly propagate 'undef' to those operands. if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { for (MachineOperand &MO : MRI->reg_operands(DstReg)) { if (MO.isUndef()) @@ -1891,6 +1901,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, } } + // If DstInt already has a subrange for the unused lanes, then we shouldn't + // create duplicate subranges when we update the interval for unused lanes. + LaneBitmask DstIntLaneMask; + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + for (LiveInterval::SubRange &SR : DstInt->subranges()) + DstIntLaneMask |= SR.LaneMask; + } + + // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. SmallPtrSet<MachineInstr *, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1914,6 +1933,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool RequiresImplicitRedef = false; + if (!SubregToRegSrcInsts.empty()) { + // We can only add an implicit-def and undef if the sub registers match, + // e.g. + // %0:gr32 = INSTX + // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined + // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 + // + // This cannot be transformed into: + // %1.sub32:gr64 = INSTX + // undef %1.sub8:gr64 = INSTY , implicit-def %1 + // + // Because that would thrash the top 24 bits of %1.sub32. + if (is_contained(SubregToRegSrcInsts, UseMI) && + all_of(UseMI->defs(), + [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool { + if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef()) + return true; + return SubIdx == MO.getSubReg(); + })) { + // Add implicit-def of super-register to express that the whole + // register is defined by the instruction. + MachineInstrBuilder MIB(*MF, UseMI); + MIB.addReg(DstReg, RegState::ImplicitDefine); + RequiresImplicitRedef = true; + } + + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + // First check if there is sufficient granularity in terms of subranges. + LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = DstMask & ~UsedLanes; + if ((UnusedLanes & ~DstIntLaneMask).any()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); + DstIntLaneMask |= UnusedLanes; + } + + // After duplicating the live ranges for the low/hi bits, we + // need to update the subranges of the DstReg interval such that + // for a case like this: + // + // entry: + // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) + // : + // if.then: + // 32B %1:gpr32 = MOVIMM32 .. + // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 + // + // Only the MOVIMM32 require a def of the top lanes and any intervals + // for the top 32-bits of the def at 16B should be removed. + for (LiveInterval::SubRange &SR : DstInt->subranges()) { + if (!Writes || RequiresImplicitRedef || + (SR.LaneMask & UnusedLanes).none()) + continue; + + assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && + "Unexpected lanemask. Subrange needs finer granularity"); + + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false); + auto SegmentI = SR.find(UseIdx); + if (SegmentI != SR.end()) + SR.removeSegment(SegmentI, true); + } + } + } + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -1922,7 +2015,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // turn a full def into a read-modify-write sub-register def and vice // versa. if (SubIdx && MO.isDef()) - MO.setIsUndef(!Reads); + MO.setIsUndef(!Reads || RequiresImplicitRedef); // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -2025,6 +2118,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } +/// For a given use of value \p Idx, it returns the def in the current block, +/// or otherwise all possible defs in preceding blocks. +static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks, + SmallVector<MachineInstr *> &Instrs, + LiveIntervals *LIS, LiveInterval &SrcInt, + MachineBasicBlock *MBB, VNInfo *Idx) { + if (!Idx->isPHIDef()) { + MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); + assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); + Instrs.push_back(Def); + return true; + } + + bool Any = false; + if (VisitedBlocks.count(MBB)) + return false; + VisitedBlocks.insert(MBB); + for (MachineBasicBlock *Pred : MBB->predecessors()) { + Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred, + SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred))); + } + return Any; +} + bool RegisterCoalescer::joinCopy( MachineInstr *CopyMI, bool &Again, SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { @@ -2156,6 +2273,35 @@ bool RegisterCoalescer::joinCopy( }); } + SmallVector<MachineInstr *> SubregToRegSrcInsts; + if (CopyMI->isSubregToReg()) { + // For the case where the copy instruction is a SUBREG_TO_REG, e.g. + // + // %0:gpr32 = movimm32 .. + // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 + // ... + // %0:gpr32 = COPY <something> + // + // After joining liveranges, the original `movimm32` will need an + // implicit-def to make it explicit that the entire register is written, + // i.e. + // + // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 + // ... + // undef %0.sub32:gpr64 = COPY <something> // Note that this does not + // // require an implicit-def, + // // because it has nothing to + // // do with the SUBREG_TO_REG. + LiveInterval &SrcInt = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); + SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks; + if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt, + CopyMI->getParent(), + SrcInt.Query(SubregToRegSlotIdx).valueIn())) + llvm_unreachable("SUBREG_TO_REG src requires a def"); + } + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2225,9 +2371,12 @@ bool RegisterCoalescer::joinCopy( // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) + if (CP.getDstIdx()) { + assert(SubregToRegSrcInsts.empty() && "can this happen?"); updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + } + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + SubregToRegSrcInsts); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d3df434..5989c1d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/ByteProvider.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SDPatternMatch.h" @@ -330,6 +331,11 @@ namespace { return CombineTo(N, To, 2, AddTo); } + SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To, + bool AddTo = true) { + return CombineTo(N, To->data(), To->size(), AddTo); + } + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: @@ -540,6 +546,7 @@ namespace { SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_INTERLEAVE(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); @@ -2020,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); @@ -4099,18 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) if (N1.hasOneUse() && hasUMin(VT)) { SDValue Y; - if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero()))) + auto MS0 = m_Specific(N0); + auto MVY = m_Value(Y); + auto MZ = m_Zero(); + auto MCC1 = m_SpecificCondCode(ISD::SETULT); + auto MCC2 = m_SpecificCondCode(ISD::SETUGE); + + if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) || + sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, DAG.getNode(ISD::SUB, DL, VT, N0, Y)); } @@ -10615,6 +10622,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getVScale(DL, VT, C0 << C1); } + SDValue X; + APInt VS0; + + // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1)) + if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) { + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap()); + + SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue()); + return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags); + } + // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; if (N0.getOpcode() == ISD::STEP_VECTOR && @@ -15262,23 +15282,31 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { } } - // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller - // than X, and the And doesn't change the lower iX bits, we can move the - // AssertZext in front of the And and drop the AssertSext. if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND && - N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext && isa<ConstantSDNode>(N0.getOperand(1))) { - SDValue BigA = N0.getOperand(0); - EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); const APInt &Mask = N0.getConstantOperandAPInt(1); - if (AssertVT.bitsLT(BigA_AssertVT) && - Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) { - SDLoc DL(N); - SDValue NewAssert = - DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1); - return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert, - N0.getOperand(1)); + + // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller + // than X, and the And doesn't change the lower iX bits, we can move the + // AssertZext in front of the And and drop the AssertSext. + if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) { + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + if (AssertVT.bitsLT(BigA_AssertVT) && + Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) { + SDLoc DL(N); + SDValue NewAssert = + DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1); + return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert, + N0.getOperand(1)); + } } + + // Remove AssertZext entirely if the mask guarantees the assertion cannot + // fail. + // TODO: Use KB countMinLeadingZeros to handle non-constant masks? + if (Mask.isIntN(AssertVT.getScalarSizeInBits())) + return N0; } return SDValue(); @@ -22778,8 +22806,10 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, // we can remove the store. - if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - StoreSize.getFixedValue() * 8)) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + if (LifetimeEndBase.contains( + DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8, + StoreBase, StoreSize.getFixedValue() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); @@ -25271,6 +25301,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands( return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); } +static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + + // Post-legalization we can only create wider SPLAT_VECTOR operations if both + // the type and operation is legal. The Hexagon target has custom + // legalization for SPLAT_VECTOR that splits the operation into two parts and + // concatenates them. Therefore, custom lowering must also be rejected in + // order to avoid an infinite loop. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR) + return SDValue(); + + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -25394,6 +25446,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(VT, SDLoc(N), Opnds); } + if (SDValue V = + combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) @@ -25462,6 +25518,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) { + // Check to see if all operands are identical. + if (!llvm::all_equal(N->op_values())) + return SDValue(); + + // Check to see if the identical operand is a splat. + if (!DAG.isSplatValue(N->getOperand(0))) + return SDValue(); + + // interleave splat(X), splat(X).... --> splat(X), splat(X).... + SmallVector<SDValue, 4> Ops; + Ops.append(N->op_values().begin(), N->op_values().end()); + return CombineTo(N, &Ops); +} + // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) { @@ -28971,13 +29042,100 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return SDValue(); } +static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, + const TargetLowering &TLI) { + // Match a pattern such as: + // (X | (X >> C0) | (X >> C1) | ...) & Mask + // This extracts contiguous parts of X and ORs them together before comparing. + // We can optimize this so that we directly check (X & SomeMask) instead, + // eliminating the shifts. + + EVT VT = Root.getValueType(); + + // TODO: Support vectors? + if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND) + return SDValue(); + + SDValue N0 = Root.getOperand(0); + SDValue N1 = Root.getOperand(1); + + if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1)) + return SDValue(); + + APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal(); + + SDValue Src; + const auto IsSrc = [&](SDValue V) { + if (!Src) { + Src = V; + return true; + } + + return Src == V; + }; + + SmallVector<SDValue> Worklist = {N0}; + APInt PartsMask(VT.getSizeInBits(), 0); + while (!Worklist.empty()) { + SDValue V = Worklist.pop_back_val(); + if (!V.hasOneUse() && (Src && Src != V)) + return SDValue(); + + if (V.getOpcode() == ISD::OR) { + Worklist.push_back(V.getOperand(0)); + Worklist.push_back(V.getOperand(1)); + continue; + } + + if (V.getOpcode() == ISD::SRL) { + SDValue ShiftSrc = V.getOperand(0); + SDValue ShiftAmt = V.getOperand(1); + + if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt)) + return SDValue(); + + auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal(); + if (ShiftAmtVal > RootMask.getBitWidth()) + return SDValue(); + + PartsMask |= (RootMask << ShiftAmtVal); + continue; + } + + if (IsSrc(V)) { + PartsMask |= RootMask; + continue; + } + + return SDValue(); + } + + if (!Src) + return SDValue(); + + SDLoc DL(Root); + return DAG.getNode(ISD::AND, DL, VT, + {Src, DAG.getConstant(PartsMask, DL, VT)}); +} + /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); - return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); + if (SDValue C = + TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL)) + return C; + + if (ISD::isIntEqualitySetCC(Cond) && N0.getOpcode() == ISD::AND && + isNullConstant(N1)) { + + if (SDValue Res = matchMergedBFX(N0, DAG, TLI)) + return DAG.getSetCC(DL, VT, Res, N1, Cond); + } + + return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant, return @@ -29415,7 +29573,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { MachineMemOperand *MMO; }; - auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics { + auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics { if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) { int64_t Offset = 0; if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset())) @@ -29428,13 +29586,15 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { LSN->getBasePtr(), Offset /*base offset*/, LocationSize::precise(Size), LSN->getMemOperand()}; } - if (const auto *LN = cast<LifetimeSDNode>(N)) + if (const auto *LN = cast<LifetimeSDNode>(N)) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), 0, - LocationSize::precise(LN->getSize()), + LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())), (MachineMemOperand *)nullptr}; + } // Default. return {false /*isvolatile*/, /*isAtomic*/ false, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 74172b2..ba0ab23 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::FP_TO_FP16: LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); - if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + if (Node->getFlags().hasApproximateFuncs() && !TLI.useSoftFloat()) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e5704c0..583a85a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" @@ -357,6 +358,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::PATCHPOINT: Res = PromoteIntRes_PATCHPOINT(N); break; + case ISD::READ_REGISTER: + Res = PromoteIntRes_READ_REGISTER(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -2076,6 +2080,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::PATCHPOINT: Res = PromoteIntOp_PATCHPOINT(N, OpNo); break; + case ISD::WRITE_REGISTER: + Res = PromoteIntOp_WRITE_REGISTER(N, OpNo); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = PromoteIntOp_VP_STRIDED(N, OpNo); @@ -2853,6 +2860,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_WRITE_REGISTER(SDNode *N, + unsigned OpNo) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.write_register with illegal type", Fn, + N->getDebugLoc())); + return N->getOperand(0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); @@ -3127,6 +3143,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VSCALE: ExpandIntRes_VSCALE(N, Lo, Hi); break; + + case ISD::READ_REGISTER: + ExpandIntRes_READ_REGISTER(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -5471,6 +5491,18 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, SplitInteger(Res, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc())); + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getPOISON(LoVT); + Hi = DAG.getPOISON(HiVT); +} + //===----------------------------------------------------------------------===// // Integer Operand Expansion //===----------------------------------------------------------------------===// @@ -5537,6 +5569,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = ExpandIntOp_VP_STRIDED(N, OpNo); break; + case ISD::WRITE_REGISTER: + Res = ExpandIntOp_WRITE_REGISTER(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5935,6 +5970,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.write_register with illegal type", Fn, + N->getDebugLoc())); + + return N->getOperand(0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { SDLoc dl(N); @@ -6332,6 +6376,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) { return Res.getValue(0); } +SDValue DAGTypeLegalizer::PromoteIntRes_READ_REGISTER(SDNode *N) { + const Function &Fn = DAG.getMachineFunction().getFunction(); + Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure( + "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc())); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ReplaceValueWith(SDValue(N, 1), N->getOperand(0)); + return DAG.getPOISON(NVT); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9b53724..2e13b18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -378,6 +378,7 @@ private: SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); SDValue PromoteIntRes_PATCHPOINT(SDNode *N); + SDValue PromoteIntRes_READ_REGISTER(SDNode *N); SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N); SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N); SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N); @@ -428,6 +429,7 @@ private: SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo); @@ -511,6 +513,7 @@ private: void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); @@ -534,6 +537,7 @@ private: SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 6a2e782..31e7855 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -888,7 +888,8 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } if (MI->isCandidateForAdditionalCallInfo()) { - if (DAG->getTarget().Options.EmitCallSiteInfo) + if (DAG->getTarget().Options.EmitCallSiteInfo || + DAG->getTarget().Options.EmitCallGraphSection) MF.addCallSiteInfo(MI, DAG->getCallSiteInfo(Node)); if (auto CalledGlobal = DAG->getCalledGlobal(Node)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 773ff48..f41b6eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -784,10 +784,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::TargetFrameIndex: ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); break; - case ISD::LIFETIME_START: - case ISD::LIFETIME_END: - ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); - break; case ISD::PSEUDO_PROBE: ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid()); ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex()); @@ -7847,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } - // Perform trivial constant folding. - if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) - return SV; + if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) { + switch (Opcode) { + case ISD::XOR: + case ISD::ADD: + case ISD::PTRADD: + case ISD::SUB: + case ISD::SIGN_EXTEND_INREG: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::MUL: + case ISD::AND: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::UMIN: + case ISD::OR: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::UMAX: + case ISD::SMAX: + case ISD::SMIN: + // fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison. + return N2.getOpcode() == ISD::POISON ? N2 : N1; + } + } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.isUndef()) { + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) { if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { case ISD::PTRADD: case ISD::SUB: - // fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(undef, non_undef_arg2) -> undef. + return N1; case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: @@ -7868,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SREM: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. - return N1.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + // fold op(undef, non_undef_arg2) -> 0. + return getConstant(0, DL, VT); } } } // Fold a bunch of operators when the RHS is undef. - if (N2.isUndef()) { + if (N2.getOpcode() == ISD::UNDEF) { switch (Opcode) { case ISD::XOR: - if (N1.isUndef()) + if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -7887,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::ADD: case ISD::PTRADD: case ISD::SUB: + // fold op(arg1, undef) -> undef. + return N2; case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: - // fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); + // fold op(arg1, undef) -> poison. + return getPOISON(VT); case ISD::MUL: case ISD::AND: case ISD::SSUBSAT: case ISD::USUBSAT: - // fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getConstant(0, DL, VT); + case ISD::UMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0. + return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT); case ISD::OR: case ISD::SADDSAT: case ISD::UADDSAT: - // fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> - // poison. - return N2.getOpcode() == ISD::POISON ? getPOISON(VT) - : getAllOnesConstant(DL, VT); + case ISD::UMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1. + return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT); + case ISD::SMAX: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL, + VT); + case ISD::SMIN: + // fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT. + return N1.getOpcode() == ISD::UNDEF + ? N2 + : getConstant( + APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL, + VT); } } + // Perform trivial constant folding. + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) + return SV; + // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); @@ -9360,8 +9397,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, - SDValue Chain, int FrameIndex, - int64_t Size) { + SDValue Chain, int FrameIndex) { const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; const auto VTs = getVTList(MVT::Other); SDValue Ops[2] = { @@ -9373,13 +9409,12 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); ID.AddInteger(FrameIndex); - ID.AddInteger(Size); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - LifetimeSDNode *N = newSDNode<LifetimeSDNode>(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTs, Size); + LifetimeSDNode *N = + newSDNode<LifetimeSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs); createOperands(N, Ops); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -12747,7 +12782,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { return Seen; } -/// isOperand - Return true if this node is an operand of N. +/// Return true if the referenced return value is an operand of N. bool SDValue::isOperandOf(const SDNode *N) const { return is_contained(N->op_values(), *this); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1636465..306e068 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); + SDNodeFlags Flags; + if (auto *TruncInst = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*TruncInst); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, DAG.getTargetConstant( - 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); + 0, dl, TLI.getPointerTy(DAG.getDataLayout())), + Flags)); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -7594,8 +7598,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (TM.getOptLevel() == CodeGenOptLevel::None) return; - const int64_t ObjectSize = - cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1)); // First check that the Alloca is static, otherwise it won't have a @@ -7605,7 +7607,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; const int FrameIndex = SI->second; - Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize); + Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex); DAG.setRoot(Res); return; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9474587..900da76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -946,8 +946,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << " -> " << ASC->getDestAddressSpace() << ']'; - } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) { - OS << "<0 to " << LN->getSize() << ">"; } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) { OS << '<' << AA->getAlign().value() << '>'; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1764910..48d6b99 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9471,7 +9471,7 @@ SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, ISD::SRL, DL, VT, DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg), DAG.getConstant(DeBruijn, DL, VT)), - DAG.getConstant(ShiftAmt, DL, VT)); + DAG.getShiftAmountConstant(ShiftAmt, VT, DL)); Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD)); SmallVector<uint8_t> Table(BitWidth, 0); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index b79911b..2a8234a 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -588,7 +588,14 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, continue; Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator()); if (!CheckLoc && !DisableCheckNoReturn) - for (auto &Inst : BB) + for (auto &Inst : BB) { + if (IntrinsicInst *IB = dyn_cast<IntrinsicInst>(&Inst); + IB && (IB->getIntrinsicID() == Intrinsic::eh_sjlj_callsite)) { + // eh_sjlj_callsite has to be in same BB as the + // bb terminator. Don't insert within this range. + CheckLoc = IB; + break; + } if (auto *CB = dyn_cast<CallBase>(&Inst)) // Do stack check before noreturn calls that aren't nounwind (e.g: // __cxa_throw). @@ -596,6 +603,7 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, CheckLoc = CB; break; } + } if (!CheckLoc) continue; diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index a88c57f..5d720fb 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -604,12 +604,21 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, bool HasComputedGoto = false; if (!TailBB.empty()) { HasIndirectbr = TailBB.back().isIndirectBranch(); - HasComputedGoto = TailBB.terminatorIsComputedGoto(); + HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors(); } if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = TailDupIndirectBranchSize; + // Allow higher limits when the block has computed-gotos and running after + // register allocation. NB. This basically unfactors computed gotos that were + // factored early on in the compilation process to speed up edge based data + // flow. If we do not unfactor them again, it can seriously pessimize code + // with many computed jumps in the source code, such as interpreters. + // Therefore we do not restrict the computed gotos. + if (HasComputedGoto && !PreRegAlloc) + MaxDuplicateCount = std::max(MaxDuplicateCount, 10u); + // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; @@ -663,12 +672,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // Duplicating a BB which has both multiple predecessors and successors will // may cause huge amount of PHI nodes. If we want to remove this limitation, // we have to address https://github.com/llvm/llvm-project/issues/78578. - // NB. This basically unfactors computed gotos that were factored early on in - // the compilation process to speed up edge based data flow. If we do not - // unfactor them again, it can seriously pessimize code with many computed - // jumps in the source code, such as interpreters. Therefore we do not - // restrict the computed gotos. - if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize && + if (PreRegAlloc && TailBB.pred_size() > TailDupPredSize && TailBB.succ_size() > TailDupSuccSize) { // If TailBB or any of its successors contains a phi, we may have to add a // large number of additional phis with additional incoming values. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 18d6bbc..705e046e 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1406,7 +1406,7 @@ void TargetInstrInfo::reassociateOps( const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder( MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true)) - .setPCSections(MIMD.getPCSections()) + .copyMIMetadata(MIMD) .addReg(DestReg, RegState::Define); }; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index d4a3455..3c91b0e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() { ISD::SDIVFIX, ISD::SDIVFIXSAT, ISD::UDIVFIX, ISD::UDIVFIXSAT, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, - ISD::IS_FPCLASS}, + ISD::IS_FPCLASS, ISD::FCBRT, + ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, + ISD::FEXP2, ISD::FEXP10, + ISD::FFLOOR, ISD::FNEARBYINT, + ISD::FCEIL, ISD::FRINT, + ISD::FTRUNC, ISD::FROUNDEVEN, + ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, + ISD::FCOSH, ISD::FSINH, + ISD::FTANH, ISD::FATAN2}, VT, Expand); // Overflow operations default to expand @@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT.isVector()) - setOperationAction( - {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, - ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, - ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, - VT, Expand); + setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR, + ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND}, + VT, Expand); // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ @@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() { {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, Expand); - // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, - ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, - ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, - ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, - ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, - ISD::FATAN2}, - {MVT::f32, MVT::f64, MVT::f128}, Expand); - // Insert custom handling default for llvm.canonicalize.*. setOperationAction(ISD::FCANONICALIZE, {MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand); @@ -2062,7 +2062,7 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (M.getDirectAccessExternalData() && - !TM.getTargetTriple().isWindowsGNUEnvironment() && + !TM.getTargetTriple().isOSCygMing() && !(TM.getTargetTriple().isPPC64() && TM.getTargetTriple().isOSFreeBSD()) && (!TM.getTargetTriple().isOSDarwin() || diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a40ceaa..e9172f4 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -995,7 +995,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA( if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections())) return LSDASection; - const auto *LSDA = cast<MCSectionELF>(LSDASection); + const auto *LSDA = static_cast<const MCSectionELF *>(LSDASection); unsigned Flags = LSDA->getFlags(); const MCSymbolELF *LinkedToSym = nullptr; StringRef Group; @@ -1060,27 +1060,27 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( auto &Context = getContext(); if (Kind.isMergeableConst4() && MergeableConst4Section) - return Context.getELFSection(".rodata.cst4." + SectionSuffix, + return Context.getELFSection(".rodata.cst4." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); if (Kind.isMergeableConst8() && MergeableConst8Section) - return Context.getELFSection(".rodata.cst8." + SectionSuffix, + return Context.getELFSection(".rodata.cst8." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); if (Kind.isMergeableConst16() && MergeableConst16Section) - return Context.getELFSection(".rodata.cst16." + SectionSuffix, + return Context.getELFSection(".rodata.cst16." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); if (Kind.isMergeableConst32() && MergeableConst32Section) - return Context.getELFSection(".rodata.cst32." + SectionSuffix, + return Context.getELFSection(".rodata.cst32." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); if (Kind.isReadOnly()) - return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); + return Context.getELFSection(".rodata." + SectionSuffix + ".", + ELF::SHT_PROGBITS, ELF::SHF_ALLOC); assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return Context.getELFSection(".data.rel.ro." + SectionSuffix, + return Context.getELFSection(".data.rel.ro." + SectionSuffix + ".", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); } @@ -1734,7 +1734,8 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF, /*AddSegmentInfo=*/false) || Name == getInstrProfSectionName(IPSK_covname, Triple::COFF, - /*AddSegmentInfo=*/false)) + /*AddSegmentInfo=*/false) || + Name == ".llvmbc" || Name == ".llvmcmd") Kind = SectionKind::getMetadata(); int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); @@ -2054,14 +2055,14 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection( getContext(), getContext().getTargetTriple(), true, Priority, KeySym, - cast<MCSectionCOFF>(StaticCtorSection)); + static_cast<MCSectionCOFF *>(StaticCtorSection)); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getCOFFStaticStructorSection( getContext(), getContext().getTargetTriple(), false, Priority, KeySym, - cast<MCSectionCOFF>(StaticDtorSection)); + static_cast<MCSectionCOFF *>(StaticDtorSection)); } const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( @@ -2388,23 +2389,25 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, // here. if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) { if (GO->isDeclarationForLinker()) - return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM)) + return static_cast<const MCSectionXCOFF *>( + getSectionForExternalReference(GO, TM)) ->getQualNameSymbol(); if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->hasAttribute("toc-data")) - return cast<MCSectionXCOFF>( + return static_cast<const MCSectionXCOFF *>( SectionForGlobal(GVar, SectionKind::getData(), TM)) ->getQualNameSymbol(); SectionKind GOKind = getKindForGlobal(GO, TM); if (GOKind.isText()) - return cast<MCSectionXCOFF>( + return static_cast<const MCSectionXCOFF *>( getSectionForFunctionDescriptor(cast<Function>(GO), TM)) ->getQualNameSymbol(); if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() || GOKind.isBSSLocal() || GOKind.isThreadBSSLocal()) - return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM)) + return static_cast<const MCSectionXCOFF *>( + SectionForGlobal(GO, GOKind, TM)) ->getQualNameSymbol(); } @@ -2740,7 +2743,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { - auto *LSDA = cast<MCSectionXCOFF>(LSDASection); + auto *LSDA = static_cast<MCSectionXCOFF *>(LSDASection); if (TM.getFunctionSections()) { // If option -ffunction-sections is on, append the function name to the // name of the LSDA csect so that each function has its own LSDA csect. diff --git a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp index 6267207..fd54190 100644 --- a/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp +++ b/llvm/lib/CodeGen/WindowsSecureHotPatching.cpp @@ -369,6 +369,19 @@ static GlobalVariable *getOrCreateRefVariable( AddrOfOldGV, Twine("__ref_").concat(GV->getName()), nullptr, GlobalVariable::NotThreadLocal); + // RefGV is created with isConstant = false, but we want to place RefGV into + // .rdata, not .data. It is important that the GlobalVariable be mutable + // from the compiler's point of view, so that the optimizer does not remove + // the global variable entirely and replace all references to it with its + // initial value. + // + // When the Windows hot-patch loader applies a hot-patch, it maps the + // pages of .rdata as read/write so that it can set each __ref_* variable + // to point to the original variable in the base image. Afterward, pages in + // .rdata are remapped as read-only. This protects the __ref_* variables from + // being overwritten during execution. + RefGV->setSection(".rdata"); + // Create debug info for the replacement global variable. DataLayout Layout = M->getDataLayout(); DIType *DebugType = DebugInfo.createPointerType( |