diff options
Diffstat (limited to 'llvm/lib')
20 files changed, 190 insertions, 114 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 82530e7..5907e21 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, Type *MidTy = CI->getType(); Type *DstTy = Ty; if (Src->getType() == Ty) { - auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); + auto FirstOp = CI->getOpcode(); auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 11b8576..7188833 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -972,10 +972,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // the call graph which could lead to some target function. For tail // calls, no return PC information is needed, unless tuning for GDB in // DWARF4 mode in which case we fake a return PC for compatibility. - const MCSymbol *PCAddr = - (!IsTail || CU.useGNUAnalogForDwarf5Feature()) - ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI)) - : nullptr; + const MCSymbol *PCAddr = (!IsTail || CU.useGNUAnalogForDwarf5Feature()) + ? getLabelAfterInsn(TopLevelCallMI) + : nullptr; // For tail calls, it's necessary to record the address of the branch // instruction so that the debugger can show where the tail call occurred. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 840ca83..7928772 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy( Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + InsertPointTy SavedIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()}, @@ -2630,7 +2630,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( WcFunc->addParamAttr(1, Attribute::NoUndef); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // ReduceList: thread local Reduce list. // At the stage of the computation when this function is called, partially @@ -2845,6 +2844,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(SavedIP); return WcFunc; } @@ -2853,7 +2853,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, AttributeList FuncAttrs) { LLVMContext &Ctx = M.getContext(); - IRBuilder<>::InsertPointGuard IPG(Builder); FunctionType *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt16Ty(), @@ -2872,7 +2871,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( SarFunc->addParamAttr(3, Attribute::SExt); BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Thread local Reduce list used to host the values of data to be reduced. Argument *ReduceListArg = SarFunc->getArg(0); @@ -3019,7 +3017,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction( Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3035,7 +3033,6 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3123,13 +3120,14 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3145,7 +3143,6 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3206,13 +3203,14 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction( Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGRFunc; } Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); FunctionType *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3228,7 +3226,6 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGCFunc->getArg(0); @@ -3314,13 +3311,14 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction( } Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGCFunc; } Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn, Type *ReductionsBufferTy, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); + OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP(); LLVMContext &Ctx = M.getContext(); auto *FuncTy = FunctionType::get( Builder.getVoidTy(), @@ -3336,7 +3334,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Buffer: global reduction buffer. Argument *BufferArg = LtGRFunc->getArg(0); @@ -3397,6 +3394,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction( Builder.CreateCall(ReduceFn, {ReduceList, ReductionList}) ->addFnAttr(Attribute::NoUnwind); Builder.CreateRetVoid(); + Builder.restoreIP(OldIP); return LtGRFunc; } @@ -3409,7 +3407,6 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const { Expected<Function *> OpenMPIRBuilder::createReductionFunction( StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos, ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) { - IRBuilder<>::InsertPointGuard IPG(Builder); auto *FuncTy = FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, /* IsVarArg */ false); @@ -3422,7 +3419,6 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction( BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", ReductionFunc); Builder.SetInsertPoint(EntryBB); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); // Need to alloca memory here and deal with the pointers before getting // LHS/RHS pointers out @@ -3750,12 +3746,10 @@ static Error populateReductionFunction( Function *ReductionFunc, ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos, IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) { - IRBuilder<>::InsertPointGuard IPG(Builder); Module *Module = ReductionFunc->getParent(); BasicBlock *ReductionFuncBlock = BasicBlock::Create(Module->getContext(), "", ReductionFunc); Builder.SetInsertPoint(ReductionFuncBlock); - Builder.SetCurrentDebugLocation(llvm::DebugLoc()); Value *LHSArrayPtr = nullptr; Value *RHSArrayPtr = nullptr; if (IsGPU) { diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index dbb2fd1..c24c82d 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -346,17 +346,16 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, Displacement *= -1; } - // Track whether B is before a relaxable instruction and whether A is after - // a relaxable instruction. If SA and SB are separated by a linker-relaxable - // instruction, the difference cannot be resolved as it may be changed by - // the linker. + // Track whether B is before a relaxable instruction/alignment and whether A + // is after a relaxable instruction/alignment. If SA and SB are separated by + // a linker-relaxable instruction/alignment, the difference cannot be + // resolved as it may be changed by the linker. bool BBeforeRelax = false, AAfterRelax = false; for (auto F = FB; F; F = F->getNext()) { - auto DF = F->getKind() == MCFragment::FT_Data ? F : nullptr; - if (DF && DF->isLinkerRelaxable()) { - if (&*F != FB || SBOffset != DF->getContents().size()) + if (F && F->isLinkerRelaxable()) { + if (&*F != FB || SBOffset != F->getSize()) BBeforeRelax = true; - if (&*F != FA || SAOffset == DF->getContents().size()) + if (&*F != FA || SAOffset == F->getSize()) AAfterRelax = true; if (BBeforeRelax && AAfterRelax) return; @@ -370,17 +369,15 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, } int64_t Num; - if (DF) { - Displacement += DF->getContents().size(); - } else if (F->getKind() == MCFragment::FT_Relaxable && + if (F->getKind() == MCFragment::FT_Data) { + Displacement += F->getFixedSize(); + } else if ((F->getKind() == MCFragment::FT_Relaxable || + F->getKind() == MCFragment::FT_Align) && Asm->hasFinalLayout()) { // Before finishLayout, a relaxable fragment's size is indeterminate. // After layout, during relocation generation, it can be treated as a // data fragment. Displacement += F->getSize(); - } else if (F->getKind() == MCFragment::FT_Align && Layout && - F->isLinkerRelaxable()) { - Displacement += Asm->computeFragmentSize(*F); } else if (auto *FF = dyn_cast<MCFillFragment>(F); FF && FF->getNumValues().evaluateAsAbsolute(Num)) { Displacement += Num * FF->getValueSize(); diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index f046552..9c7b05b 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -340,31 +340,33 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst, MCFragment *F = getCurrentFragment(); // Append the instruction to the data fragment. - size_t FixupStartIndex = F->getFixups().size(); size_t CodeOffset = F->getContents().size(); SmallVector<MCFixup, 1> Fixups; getAssembler().getEmitter().encodeInstruction( Inst, F->getContentsForAppending(), Fixups, STI); F->doneAppending(); - if (!Fixups.empty()) - F->appendFixups(Fixups); F->setHasInstructions(STI); + if (Fixups.empty()) + return; bool MarkedLinkerRelaxable = false; - for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) { + for (auto &Fixup : Fixups) { Fixup.setOffset(Fixup.getOffset() + CodeOffset); - if (!Fixup.isLinkerRelaxable()) + if (!Fixup.isLinkerRelaxable() || MarkedLinkerRelaxable) continue; - F->setLinkerRelaxable(); + MarkedLinkerRelaxable = true; + // Set the fragment's order within the subsection for use by + // MCAssembler::relaxAlign. + auto *Sec = F->getParent(); + if (!Sec->isLinkerRelaxable()) + Sec->setLinkerRelaxable(); // Do not add data after a linker-relaxable instruction. The difference // between a new label and a label at or before the linker-relaxable // instruction cannot be resolved at assemble-time. - if (!MarkedLinkerRelaxable) { - MarkedLinkerRelaxable = true; - getCurrentSectionOnly()->setLinkerRelaxable(); - newFragment(); - } + F->setLinkerRelaxable(); + newFragment(); } + F->appendFixups(Fixups); } void MCObjectStreamer::emitInstToFragment(const MCInst &Inst, diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp index 2579fa3..0f19495 100644 --- a/llvm/lib/Object/IRSymtab.cpp +++ b/llvm/lib/Object/IRSymtab.cpp @@ -8,11 +8,11 @@ #include "llvm/Object/IRSymtab.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Comdat.h" @@ -213,9 +213,10 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) { return P.first->second; } -static DenseSet<StringRef> buildPreservedSymbolsSet(const Triple &TT) { - DenseSet<StringRef> PreservedSymbolSet(std::begin(PreservedSymbols), - std::end(PreservedSymbols)); +static StringSet<> buildPreservedSymbolsSet(const Triple &TT) { + StringSet<> PreservedSymbolSet; + PreservedSymbolSet.insert(std::begin(PreservedSymbols), + std::end(PreservedSymbols)); // FIXME: Do we need to pass in ABI fields from TargetOptions? RTLIB::RuntimeLibcallsInfo Libcalls(TT); for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) { @@ -280,7 +281,7 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, setStr(Sym.IRName, GV->getName()); - static const DenseSet<StringRef> PreservedSymbolsSet = + static const StringSet<> PreservedSymbolsSet = buildPreservedSymbolsSet(GV->getParent()->getTargetTriple()); bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName()); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 613cfb5..d96136c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2385,13 +2385,6 @@ SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op, return Res; } -static bool isConstantBUILD_VECTOR(const BuildVectorSDNode *Op) { - for (unsigned i = 0; i < Op->getNumOperands(); ++i) - if (isIntOrFPConstant(Op->getOperand(i))) - return true; - return false; -} - // Lower BUILD_VECTOR as broadcast load (if possible). // For example: // %a = load i8, ptr %ptr @@ -2441,10 +2434,14 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); EVT ResTy = Op->getValueType(0); + unsigned NumElts = ResTy.getVectorNumElements(); SDLoc DL(Op); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; + bool IsConstant = false; + bool UseSameConstant = true; + SDValue ConstantValue; bool Is128Vec = ResTy.is128BitVector(); bool Is256Vec = ResTy.is256BitVector(); @@ -2495,13 +2492,35 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) return Op; - if (!isConstantBUILD_VECTOR(Node)) { + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Opi = Node->getOperand(i); + if (isIntOrFPConstant(Opi)) { + IsConstant = true; + if (!ConstantValue.getNode()) + ConstantValue = Opi; + else if (ConstantValue != Opi) + UseSameConstant = false; + } + } + + // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits. + if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) { + SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Opi = Node->getOperand(i); + if (!isIntOrFPConstant(Opi)) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi, + DAG.getConstant(i, DL, Subtarget.getGRLenVT())); + } + return Result; + } + + if (!IsConstant) { // Use INSERT_VECTOR_ELT operations rather than expand to stores. // The resulting code is the same length as the expansion, but it doesn't // use memory operations. assert(ResTy.isVector()); - unsigned NumElts = ResTy.getVectorNumElements(); SDValue Op0 = Node->getOperand(0); SDValue Vector = DAG.getUNDEF(ResTy); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 8fa72bc..d9ea88c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -254,6 +254,7 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup Fixup = MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN); F.setVarFixups({Fixup}); + F.setLinkerRelaxable(); F.getParent()->setLinkerRelaxable(); return true; } diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index ca03310..a2e48ab 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -737,14 +737,18 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) { if (FS.empty() && M.size() && F->hasFnAttribute("target-features")) FS = F->getFnAttribute("target-features").getValueAsString(); + std::string strFS = FS.str(); + if (M.size() && F->getFnAttribute("use-soft-float").getValueAsBool()) + strFS += strFS.empty() ? "+soft-float" : ",+soft-float"; + // Compute MIPS architecture attributes based on the default subtarget // that we'd have constructed. // FIXME: For ifunc related functions we could iterate over and look // for a feature string that doesn't match the default one. StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU()); const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM); - const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, - std::nullopt); + const MipsSubtarget STI(TT, CPU, StringRef(strFS), MTM.isLittleEndian(), + MTM, std::nullopt); bool IsABICalls = STI.isABICalls(); const MipsABIInfo &ABI = MTM.getABI(); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 2c37c3b..82e3b5c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup Fixup = MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); + F.setLinkerRelaxable(); F.getParent()->setLinkerRelaxable(); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b1ab76a..9fc0d81 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1581,7 +1581,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Set the register and all its subregisters. if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) { SavedRegs.set(CSReg); - llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); }); + for (unsigned Reg : SubRegs) + SavedRegs.set(Reg); } // Combine to super register if all of its subregisters are marked. diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 17c9833..d6afb8a 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -858,16 +858,15 @@ void RISCVISAInfo::updateImplication() { StringRef ExtName = WorkList.pop_back_val(); auto Range = std::equal_range(std::begin(ImpliedExts), std::end(ImpliedExts), ExtName); - std::for_each(Range.first, Range.second, - [&](const ImpliedExtsEntry &Implied) { - const char *ImpliedExt = Implied.ImpliedExt; - auto [It, Inserted] = Exts.try_emplace(ImpliedExt); - if (!Inserted) - return; - auto Version = findDefaultVersion(ImpliedExt); - It->second = *Version; - WorkList.push_back(ImpliedExt); - }); + for (const ImpliedExtsEntry &Implied : llvm::make_range(Range)) { + const char *ImpliedExt = Implied.ImpliedExt; + auto [It, Inserted] = Exts.try_emplace(ImpliedExt); + if (!Inserted) + continue; + auto Version = findDefaultVersion(ImpliedExt); + It->second = *Version; + WorkList.push_back(ImpliedExt); + } } // Add Zcd if C and D are enabled. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a53ccdd..6616e61f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10076,12 +10076,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); - if (LVL.hasUncountableEarlyExit() && UserIC != 1) { - UserIC = 1; - reportVectorizationInfo("Interleaving not supported for loops " - "with uncountable early exits", - "InterleaveEarlyExitDisabled", ORE, L); - } // Plan how to best vectorize. LVP.plan(UserVF, UserIC); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6655149..a5de593 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1012,6 +1012,10 @@ public: ReductionStartVector, // Creates a step vector starting from 0 to VF with a step of 1. StepVector, + /// Extracts a single lane (first operand) from a set of vector operands. + /// The lane specifies an index into a vector formed by combining all vector + /// operands (all operands after the first one). + ExtractLane, }; @@ -1837,6 +1841,10 @@ public: getGEPNoWrapFlags(), getDebugLoc()); } + /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that + /// this is only accurate after the VPlan has been unrolled. + bool isFirstPart() const { return getUnrollPart(*this) == 0; } + /// Return the cost of this VPHeaderPHIRecipe. InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override { @@ -2304,14 +2312,15 @@ public: /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can /// be omitted (implied by passing an odd number of operands) in which case /// all other incoming values are merged into it. - VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) - : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { + VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL) + : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) { assert(Operands.size() > 0 && "Expected at least one operand!"); } VPBlendRecipe *clone() override { SmallVector<VPValue *> Ops(operands()); - return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops); + return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops, + getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPBlendSC) @@ -4056,6 +4065,10 @@ public: /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } + LLVMContext &getContext() const { + return getScalarHeader()->getIRBasicBlock()->getContext(); + } + void addVF(ElementCount VF) { VFs.insert(VF); } void setVF(ElementCount VF) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 3499e65..16072f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -110,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::BuildStructVector: case VPInstruction::BuildVector: return SetResultTyFromOp(); + case VPInstruction::ExtractLane: + return inferScalarType(R->getOperand(1)); case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); case VPInstruction::ExtractLastElement: diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 5319b8c..6c1f53b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -466,28 +466,27 @@ void VPlanTransforms::prepareForVectorization( VPDominatorTree VPDT; VPDT.recalculate(Plan); - VPBlockBase *HeaderVPB = Plan.getEntry()->getSingleSuccessor(); - canonicalHeaderAndLatch(HeaderVPB, VPDT); - VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1]; + auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor()); + canonicalHeaderAndLatch(HeaderVPBB, VPDT); + auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]); VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); - // The canonical LatchVPB has the header block as last successor. If it has + // The canonical LatchVPBB has the header block as last successor. If it has // another successor, this successor is an exit block - insert middle block on // its edge. Otherwise, add middle block as another successor retaining header // as last. - if (LatchVPB->getNumSuccessors() == 2) { - VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0]; - VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB); + if (LatchVPBB->getNumSuccessors() == 2) { + VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0]; + VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB); } else { - VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB); - LatchVPB->swapSuccessors(); + VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB); + LatchVPBB->swapSuccessors(); } - addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB), - cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL); + addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL); [[maybe_unused]] bool HandledUncountableEarlyExit = false; // Disconnect all early exits from the loop leaving it with a single exit from @@ -503,8 +502,7 @@ void VPlanTransforms::prepareForVectorization( assert(!HandledUncountableEarlyExit && "can handle exactly one uncountable early exit"); handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan, - cast<VPBasicBlock>(HeaderVPB), - cast<VPBasicBlock>(LatchVPB), Range); + HeaderVPBB, LatchVPBB, Range); HandledUncountableEarlyExit = true; } else { for (VPRecipeBase &R : EB->phis()) @@ -568,15 +566,15 @@ void VPlanTransforms::prepareForVectorization( // the corresponding compare because they may have ended up with different // line numbers and we want to avoid awkward line stepping while debugging. // E.g., if the compare has got a line number inside the loop. - DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc(); + DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); VPBuilder Builder(MiddleVPBB); VPValue *Cmp; if (!RequiresScalarEpilogueCheck) - Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse( - IntegerType::getInt1Ty(TripCount->getType()->getContext()))); + Cmp = Plan.getOrAddLiveIn( + ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext()))); else if (TailFolded) - Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue( - IntegerType::getInt1Ty(TripCount->getType()->getContext()))); + Cmp = Plan.getOrAddLiveIn( + ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext()))); else Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(), &Plan.getVectorTripCount(), LatchDL, "cmp.n"); @@ -650,7 +648,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, Plan.getCanonicalIV()->getDebugLoc()); if (AddBranchWeights) { - MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext()); + MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false); Term->addMetadata(LLVMContext::MD_prof, BranchWeights); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index fc8458c..3b3bbc3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -251,8 +251,9 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { } OperandsWithMask.push_back(EdgeMask); } - PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue()); - auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask); + PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue()); + auto *Blend = + new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc()); Builder.insert(Blend); PhiR->replaceAllUsesWith(Blend); PhiR->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0d6152b..225658b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -427,6 +427,7 @@ unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(const VPUser &U) const { } namespace llvm { +template class VPUnrollPartAccessor<1>; template class VPUnrollPartAccessor<2>; template class VPUnrollPartAccessor<3>; } @@ -863,6 +864,31 @@ Value *VPInstruction::generate(VPTransformState &State) { Res = Builder.CreateOr(Res, State.get(Op)); return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res); } + case VPInstruction::ExtractLane: { + Value *LaneToExtract = State.get(getOperand(0), true); + Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0)); + Value *Res = nullptr; + Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF); + + for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) { + Value *VectorStart = + Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1)); + Value *VectorIdx = Idx == 1 + ? LaneToExtract + : Builder.CreateSub(LaneToExtract, VectorStart); + Value *Ext = State.VF.isScalar() + ? State.get(getOperand(Idx)) + : Builder.CreateExtractElement( + State.get(getOperand(Idx)), VectorIdx); + if (Res) { + Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); + Res = Builder.CreateSelect(Cmp, Ext, Res); + } else { + Res = Ext; + } + } + return Res; + } case VPInstruction::FirstActiveLane: { if (getNumOperands() == 1) { Value *Mask = State.get(getOperand(0)); @@ -921,7 +947,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } switch (getOpcode()) { - case Instruction::ExtractElement: { + case Instruction::ExtractElement: + case VPInstruction::ExtractLane: { // Add on the cost of extracting the element. auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, @@ -983,6 +1010,7 @@ bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractLastElement || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || + getOpcode() == VPInstruction::ExtractLane || getOpcode() == VPInstruction::FirstActiveLane || getOpcode() == VPInstruction::ComputeAnyOfResult || getOpcode() == VPInstruction::ComputeFindIVResult || @@ -1048,6 +1076,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::BuildVector: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::ExtractLane: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: @@ -1097,6 +1126,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindIVResult: return Op == getOperand(1); + case VPInstruction::ExtractLane: + return Op == getOperand(0); }; llvm_unreachable("switch should return"); } @@ -1176,6 +1207,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BuildVector: O << "buildvector"; break; + case VPInstruction::ExtractLane: + O << "extract-lane"; + break; case VPInstruction::ExtractLastElement: O << "extract-last-element"; break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3d8e149..935a4e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -774,10 +774,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, using namespace VPlanPatternMatch; VPValue *Incoming, *Mask; - if (!match(Op, m_VPInstruction<Instruction::ExtractElement>( - m_VPValue(Incoming), + if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>( m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Mask))))) + m_VPValue(Mask)), + m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming); @@ -1172,6 +1172,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (!Plan->isUnrolled()) return; + // VPVectorPointer for part 0 can be replaced by their start pointer. + if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { + if (VecPtr->isFirstPart()) { + VecPtr->replaceAllUsesWith(VecPtr->getOperand(0)); + return; + } + } + // VPScalarIVSteps for part 0 can be replaced by their start value, if only // the first lane is demanded. if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) { @@ -1307,8 +1315,9 @@ static void simplifyBlends(VPlan &Plan) { OperandsWithMask.push_back(Blend->getMask(I)); } - auto *NewBlend = new VPBlendRecipe( - cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask); + auto *NewBlend = + new VPBlendRecipe(cast_or_null<PHINode>(Blend->getUnderlyingValue()), + OperandsWithMask, Blend->getDebugLoc()); NewBlend->insertBefore(&R); VPValue *DeadMask = Blend->getMask(StartIndex); @@ -1361,7 +1370,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, unsigned NewBitWidth = ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF); - LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext(); + LLVMContext &Ctx = Plan.getContext(); auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth); bool MadeChange = false; @@ -2510,8 +2519,8 @@ void VPlanTransforms::createInterleaveGroups( DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) * IG->getIndex(IRInsertPos), /*IsSigned=*/true); - VPValue *OffsetVPV = Plan.getOrAddLiveIn( - ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset)); + VPValue *OffsetVPV = + Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset)); VPBuilder B(InsertPos); Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV) : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV); @@ -2837,7 +2846,7 @@ void VPlanTransforms::handleUncountableEarlyExit( VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr, "first.active.lane"); IncomingFromEarlyExit = EarlyExitB.createNaryOp( - Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane}, + VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit}, nullptr, "early.exit.value"); ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit); } @@ -3368,7 +3377,7 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator( if (VF.isScalable() && VScaleForTuning.has_value()) VectorStep *= *VScaleForTuning; assert(VectorStep > 0 && "trip count should not be zero"); - MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext()); + MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false); MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index b89cd21..871e37e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -363,6 +363,13 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { continue; } VPValue *Op0; + if (match(&R, m_VPInstruction<VPInstruction::ExtractLane>( + m_VPValue(Op0), m_VPValue(Op1)))) { + addUniformForAllParts(cast<VPInstruction>(&R)); + for (unsigned Part = 1; Part != UF; ++Part) + R.addOperand(getValueForPart(Op1, Part)); + continue; + } if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>( m_VPValue(Op0))) || match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( |