diff options
Diffstat (limited to 'llvm/lib')
77 files changed, 1042 insertions, 399 deletions
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index f90717d..1d1a5560 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -61,6 +61,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy( static cl::opt<std::string> ModelSelector("ml-inliner-model-selector", cl::Hidden, cl::init("")); +static cl::opt<bool> StopImmediatelyForTest("ml-inliner-stop-immediately", + cl::Hidden); + #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT @@ -214,6 +217,7 @@ MLInlineAdvisor::MLInlineAdvisor( return; } ModelRunner->switchContext(""); + ForceStop = StopImmediatelyForTest; } unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const { @@ -379,9 +383,17 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) { - if (!PSI.isFunctionEntryCold(&Caller)) - return std::make_unique<InlineAdvice>(this, CB, ORE, - GetDefaultAdvice(CB)); + if (!PSI.isFunctionEntryCold(&Caller)) { + // Return a MLInlineAdvice, despite delegating to the default advice, + // because we need to keep track of the internal state. This is different + // from the other instances where we return a "default" InlineAdvice, + // which happen at points we won't come back to the MLAdvisor for + // decisions requiring that state. + return ForceStop ? std::make_unique<InlineAdvice>(this, CB, ORE, + GetDefaultAdvice(CB)) + : std::make_unique<MLInlineAdvice>(this, CB, ORE, + GetDefaultAdvice(CB)); + } } auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); // If this is a "never inline" case, there won't be any changes to internal diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 1f751ee..61d4935 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -1,10 +1,14 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" +#define DEBUG_TYPE "static-data-profile-info" + using namespace llvm; namespace llvm { @@ -60,6 +64,47 @@ void StaticDataProfileInfo::addConstantProfileCount( OriginalCount = getInstrMaxCountValue(); } +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getConstantHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { + // The accummulated counter shows the constant is hot. Return enum 'hot' + // whether this variable is seen by unprofiled functions or not. + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // enum 'LukewarmOrUnknown'. + if (ConstantWithoutCounts.count(C)) + return StaticDataHotness::LukewarmOrUnknown; + // The accummulated counter shows the constant is cold so return enum 'cold'. + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getSectionHotnessUsingDataAccessProfile( + std::optional<StringRef> MaybeSectionPrefix) const { + if (!MaybeSectionPrefix) + return StaticDataHotness::LukewarmOrUnknown; + StringRef Prefix = *MaybeSectionPrefix; + assert((Prefix == "hot" || Prefix == "unlikely") && + "Expect section_prefix to be one of hot or unlikely"); + return Prefix == "hot" ? StaticDataHotness::Hot : StaticDataHotness::Cold; +} + +StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataHotness::Cold: + return "unlikely"; + case StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + std::optional<uint64_t> StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { auto I = ConstantProfileCounts.find(C); @@ -70,27 +115,67 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); + std::optional<uint64_t> Count = getConstantProfileCount(C); + +#ifndef NDEBUG + auto DbgPrintPrefix = [](StringRef Prefix) { + return Prefix.empty() ? "<empty>" : Prefix; + }; +#endif + + if (EnableDataAccessProf) { + // Module flag `HasDataAccessProf` is 1 -> empty section prefix means + // unknown hotness except for string literals. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C); + GV && llvm::memprof::IsAnnotationOK(*GV) && + !GV->getName().starts_with(".str")) { + auto HotnessFromDataAccessProf = + getSectionHotnessUsingDataAccessProfile(GV->getSectionPrefix()); + + if (!Count) { + StringRef Prefix = hotnessToStr(HotnessFromDataAccessProf); + LLVM_DEBUG(dbgs() << GV->getName() << " has section prefix " + << DbgPrintPrefix(Prefix) + << ", solely from data access profiles\n"); + return Prefix; + } + + // Both data access profiles and PGO counters are available. Use the + // hotter one. + auto HotnessFromPGO = getConstantHotnessUsingProfileCount(C, PSI, *Count); + StaticDataHotness GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown; + if (HotnessFromDataAccessProf == StaticDataHotness::Hot || + HotnessFromPGO == StaticDataHotness::Hot) { + GlobalVarHotness = StaticDataHotness::Hot; + } else if (HotnessFromDataAccessProf == + StaticDataHotness::LukewarmOrUnknown || + HotnessFromPGO == StaticDataHotness::LukewarmOrUnknown) { + GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown; + } else { + GlobalVarHotness = StaticDataHotness::Cold; + } + StringRef Prefix = hotnessToStr(GlobalVarHotness); + LLVM_DEBUG( + dbgs() << GV->getName() << " has section prefix " + << DbgPrintPrefix(Prefix) + << ", the max from data access profiles as " + << DbgPrintPrefix(hotnessToStr(HotnessFromDataAccessProf)) + << " and PGO counters as " + << DbgPrintPrefix(hotnessToStr(HotnessFromPGO)) << "\n"); + return Prefix; + } + } if (!Count) return ""; - // The accummulated counter shows the constant is hot. Return 'hot' whether - // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; - // The constant is not hot, and seen by unprofiled functions. We don't want to - // assign it to unlikely sections, even if the counter says 'cold'. So return - // an empty prefix before checking whether the counter is cold. - if (ConstantWithoutCounts.count(C)) - return ""; - // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) - return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { - Info.reset(new StaticDataProfileInfo()); + bool EnableDataAccessProf = false; + if (auto *MD = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("EnableDataAccessProf"))) + EnableDataAccessProf = MD->getZExtValue(); + Info.reset(new StaticDataProfileInfo(EnableDataAccessProf)); return false; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 380b192..cf63285 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -329,10 +329,6 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { for (const auto &[Name, Info] : make_early_inc_range(ForwardRefVals)) { if (StringRef(Name).starts_with("llvm.")) { Intrinsic::ID IID = Intrinsic::lookupIntrinsicID(Name); - if (IID == Intrinsic::not_intrinsic) - // Don't do anything for unknown intrinsics. - continue; - // Automatically create declarations for intrinsics. Intrinsics can only // be called directly, so the call function type directly determines the // declaration function type. @@ -346,11 +342,26 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { return error(Info.second, "intrinsic can only be used as callee"); SmallVector<Type *> OverloadTys; - if (!Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(), - OverloadTys)) - return error(Info.second, "invalid intrinsic signature"); - - U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys)); + if (IID != Intrinsic::not_intrinsic && + Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(), + OverloadTys)) { + U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys)); + } else { + // Try to upgrade the intrinsic. + Function *TmpF = Function::Create(CB->getFunctionType(), + Function::ExternalLinkage, Name, M); + Function *NewF = nullptr; + if (!UpgradeIntrinsicFunction(TmpF, NewF)) { + if (IID == Intrinsic::not_intrinsic) + return error(Info.second, "unknown intrinsic '" + Name + "'"); + return error(Info.second, "invalid intrinsic signature"); + } + + U.set(TmpF); + UpgradeIntrinsicCall(CB, NewF); + if (TmpF->use_empty()) + TmpF->eraseFromParent(); + } } Info.first->eraseFromParent(); @@ -1259,7 +1270,7 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, unsigned NameID, if (parseToken(lltok::StringConstant, "expected partition string")) return true; } else if (!IsAlias && Lex.getKind() == lltok::MetadataVar) { - if (parseGlobalObjectMetadataAttachment(*GI.get())) + if (parseGlobalObjectMetadataAttachment(*GI)) return true; } else { return tokError("unknown alias or ifunc property!"); @@ -5865,6 +5876,7 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { REQUIRED(file, MDField, (/* AllowNull */ false)); \ OPTIONAL(language, DwarfLangField, ); \ OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \ + OPTIONAL(sourceLanguageVersion, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(producer, MDStringField, ); \ OPTIONAL(isOptimized, MDBoolField, ); \ OPTIONAL(flags, MDStringField, ); \ @@ -5894,10 +5906,15 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { return error(Loc, "can only specify one of 'language' and " "'sourceLanguageName' on !DICompileUnit"); + if (sourceLanguageVersion.Seen && !sourceLanguageName.Seen) + return error(Loc, "'sourceLanguageVersion' requires an associated " + "'sourceLanguageName' on !DICompileUnit"); + Result = DICompileUnit::getDistinct( Context, language.Seen ? DISourceLanguageName(language.Val) - : DISourceLanguageName(sourceLanguageName.Val, 0), + : DISourceLanguageName(sourceLanguageName.Val, + sourceLanguageVersion.Val), file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val, diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp index e0a4471..19d5b98 100644 --- a/llvm/lib/BinaryFormat/XCOFF.cpp +++ b/llvm/lib/BinaryFormat/XCOFF.cpp @@ -112,26 +112,26 @@ StringRef XCOFF::getNameForTracebackTableLanguageId( XCOFF::CFileCpuId XCOFF::getCpuID(StringRef CPUName) { StringRef CPU = PPC::normalizeCPUName(CPUName); return StringSwitch<XCOFF::CFileCpuId>(CPU) - .Cases("generic", "COM", XCOFF::TCPU_COM) + .Cases({"generic", "COM"}, XCOFF::TCPU_COM) .Case("601", XCOFF::TCPU_601) - .Cases("602", "603", "603e", "603ev", XCOFF::TCPU_603) - .Cases("604", "604e", XCOFF::TCPU_604) + .Cases({"602", "603", "603e", "603ev"}, XCOFF::TCPU_603) + .Cases({"604", "604e"}, XCOFF::TCPU_604) .Case("620", XCOFF::TCPU_620) .Case("970", XCOFF::TCPU_970) - .Cases("a2", "g3", "g4", "g5", "e500", XCOFF::TCPU_COM) - .Cases("pwr3", "pwr4", XCOFF::TCPU_COM) - .Cases("pwr5", "PWR5", XCOFF::TCPU_PWR5) - .Cases("pwr5x", "PWR5X", XCOFF::TCPU_PWR5X) - .Cases("pwr6", "PWR6", XCOFF::TCPU_PWR6) - .Cases("pwr6x", "PWR6E", XCOFF::TCPU_PWR6E) - .Cases("pwr7", "PWR7", XCOFF::TCPU_PWR7) - .Cases("pwr8", "PWR8", XCOFF::TCPU_PWR8) - .Cases("pwr9", "PWR9", XCOFF::TCPU_PWR9) - .Cases("pwr10", "PWR10", XCOFF::TCPU_PWR10) - .Cases("ppc", "PPC", "ppc32", "ppc64", XCOFF::TCPU_COM) + .Cases({"a2", "g3", "g4", "g5", "e500"}, XCOFF::TCPU_COM) + .Cases({"pwr3", "pwr4"}, XCOFF::TCPU_COM) + .Cases({"pwr5", "PWR5"}, XCOFF::TCPU_PWR5) + .Cases({"pwr5x", "PWR5X"}, XCOFF::TCPU_PWR5X) + .Cases({"pwr6", "PWR6"}, XCOFF::TCPU_PWR6) + .Cases({"pwr6x", "PWR6E"}, XCOFF::TCPU_PWR6E) + .Cases({"pwr7", "PWR7"}, XCOFF::TCPU_PWR7) + .Cases({"pwr8", "PWR8"}, XCOFF::TCPU_PWR8) + .Cases({"pwr9", "PWR9"}, XCOFF::TCPU_PWR9) + .Cases({"pwr10", "PWR10"}, XCOFF::TCPU_PWR10) + .Cases({"ppc", "PPC", "ppc32", "ppc64"}, XCOFF::TCPU_COM) .Case("ppc64le", XCOFF::TCPU_PWR8) .Case("future", XCOFF::TCPU_PWR10) - .Cases("any", "ANY", XCOFF::TCPU_ANY) + .Cases({"any", "ANY"}, XCOFF::TCPU_ANY) .Default(XCOFF::TCPU_INVALID); } diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index cdcf7a8..ed0443f 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1860,7 +1860,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_COMPILE_UNIT: { - if (Record.size() < 14 || Record.size() > 22) + if (Record.size() < 14 || Record.size() > 23) return error("Invalid record"); // Ignore Record[0], which indicates whether this compile unit is @@ -1869,11 +1869,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( const auto LangVersionMask = (uint64_t(1) << 63); const bool HasVersionedLanguage = Record[1] & LangVersionMask; + const uint32_t LanguageVersion = Record.size() > 22 ? Record[22] : 0; auto *CU = DICompileUnit::getDistinct( Context, HasVersionedLanguage - ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0) + ? DISourceLanguageName(Record[1] & ~LangVersionMask, + LanguageVersion) : DISourceLanguageName(Record[1]), getMDOrNull(Record[2]), getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8], diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 54e916e..8ff3aa9 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2142,6 +2142,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, Record.push_back(N->getRangesBaseAddress()); Record.push_back(VE.getMetadataOrNullID(N->getRawSysRoot())); Record.push_back(VE.getMetadataOrNullID(N->getRawSDK())); + Record.push_back(Lang.hasVersionedName() ? Lang.getVersion() : 0); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 323b21e..4e6f93e 100644 --- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -1102,8 +1102,6 @@ void TrieRawHashMapHandle::print( if (auto Err = Printer.printRecords()) OS << "error: " << toString(std::move(Err)) << "\n"; - - return; } Error TrieRawHashMapHandle::validate( diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 05fffe9..e2af0c5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -119,6 +119,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/VCSRevision.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -476,6 +477,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { + VFS = vfs::getRealFileSystem(); auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); MMI = MMIWP ? &MMIWP->getMMI() : nullptr; HasSplitStack = false; @@ -1683,7 +1685,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) { return nullptr; } -/// Emits .callgraph section. +/// Emits .llvm.callgraph section. void AsmPrinter::emitCallGraphSection(const MachineFunction &MF, FunctionCallGraphInfo &FuncCGInfo) { if (!MF.getTarget().Options.EmitCallGraphSection) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index c364ffc..8dd8b9da 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -98,6 +99,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode); SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager(); SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); + SrcMgr.setVirtualFileSystem(VFS); std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 2d50167..fae952e 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -491,6 +491,20 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { return true; } if (FBB) { + // If we get here with a MBB which ends like this: + // + // bb.1: + // successors: %bb.2; + // ... + // BNE $x1, $x0, %bb.2 + // PseudoBR %bb.2 + // + // Just remove conditional branch. + if (TBB == FBB) { + removeBranch(MBB); + insertUncondBranch(MBB, TBB); + return true; + } // We need to split the basic block here to obtain two long-range // unconditional branches. NewBB = createNewBlockAfter(*MBB); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 4320b1d..9e78ec9 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -819,7 +819,7 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { } // Verify BFI has been updated correctly by recomputing BFI and comparing them. -void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { +[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) { DominatorTree NewDT(F); LoopInfo NewLI(NewDT); BranchProbabilityInfo NewBPI(F, NewLI, TLInfo); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cffaf7c..38ec83f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3292,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); - // TODO: Probably should be zext - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); Observer.changedInstr(MI); return Legalized; } @@ -3325,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx == 2) { Observer.changingInstr(MI); - // TODO: Probably should be zext - widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); Observer.changedInstr(MI); return Legalized; } diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index e359831..ea08365 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -1257,7 +1257,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( Tracker.clear(); } -static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain( +[[maybe_unused]] static void printSpillReloadChain( DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain, DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain, MachineInstr *Leader) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 358e060..c97300d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17759,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -17825,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // If nnan is enabled, fold lots of things. - if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) { + if (Flags.hasNoNaNs() && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); @@ -17974,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -18002,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0 == N1) { // (fsub x, x) -> 0.0 - if (Options.NoNaNsFPMath || Flags.hasNoNaNs()) + if (Flags.hasNoNaNs()) return DAG.getConstantFP(0.0f, DL, VT); } @@ -18313,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; // FMA nodes have flags that propagate to the created nodes. SelectionDAG::FlagInserter FlagsInserter(DAG, N); MatchContextClass matcher(DAG, TLI, N); @@ -18339,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); } - if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) || - (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) { + if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) { if (N->getFlags().hasNoSignedZeros() || (N2CFP && !N2CFP->isExactlyValue(-0.0))) { if (N0CFP && N0CFP->isZero()) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 6610eef..c61f757 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -181,8 +181,8 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch, DWARFDebugFrame::~DWARFDebugFrame() = default; -static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, - uint64_t Offset, int Length) { +[[maybe_unused]] static void dumpDataAux(DataExtractor Data, uint64_t Offset, + int Length) { errs() << "DUMP: "; for (int i = 0; i < Length; ++i) { uint8_t c = Data.getU8(&Offset); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 2430d98..3908a78 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2374,16 +2374,21 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N, Out << "!DICompileUnit("; MDFieldPrinter Printer(Out, WriterCtx); - auto Lang = N->getSourceLanguage(); - if (Lang.hasVersionedName()) + DISourceLanguageName Lang = N->getSourceLanguage(); + + if (Lang.hasVersionedName()) { Printer.printDwarfEnum( "sourceLanguageName", static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()), dwarf::SourceLanguageNameString, /* ShouldSkipZero */ false); - else + + Printer.printInt("sourceLanguageVersion", Lang.getVersion(), + /*ShouldSkipZero=*/true); + } else { Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString, /* ShouldSkipZero */ false); + } Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); Printer.printString("producer", N->getProducer()); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index a755c22..aee3c3b 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -553,7 +553,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { SFrameSection = Ctx->getELFSection(".sframe", ELF::SHT_GNU_SFRAME, ELF::SHF_ALLOC); - CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0); + CallGraphSection = + Ctx->getELFSection(".llvm.callgraph", ELF::SHT_PROGBITS, 0); StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0); @@ -1171,8 +1172,8 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const { } return Ctx->getELFSection( - ".callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, true, - ElfSec.getUniqueID(), + ".llvm.callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, + /*IsComdat=*/true, ElfSec.getUniqueID(), static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol())); } diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 3c8e44a..0208735 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -302,7 +302,7 @@ void ProfOStream::patch(ArrayRef<PatchItem> P) { std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName, - uint64_t Version LLVM_ATTRIBUTE_UNUSED) { + [[maybe_unused]] uint64_t Version) { // Value names may be prefixed with a binary '1' to indicate // that the backend should not modify the symbols due to any platform // naming convention. Do not include that '1' in the PGO profile name. diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index 82b0e6a..eff9947 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -141,7 +141,7 @@ extern "C" const char *__crashreporter_info__ asm(".desc ___crashreporter_info__, 0x10"); #endif -static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] static void setCrashLogMessage(const char *msg); static void setCrashLogMessage(const char *msg) { #ifdef HAVE_CRASHREPORTERCLIENT_H (void)CRSetCrashLogMessage(msg); diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index a43cf37a..f2bbaab 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -38,6 +39,22 @@ using namespace llvm; static const size_t TabStop = 8; +// Out of line to avoid needing definition of vfs::FileSystem in header. +SourceMgr::SourceMgr() = default; +SourceMgr::SourceMgr(IntrusiveRefCntPtr<vfs::FileSystem> FS) + : FS(std::move(FS)) {} +SourceMgr::SourceMgr(SourceMgr &&) = default; +SourceMgr &SourceMgr::operator=(SourceMgr &&) = default; +SourceMgr::~SourceMgr() = default; + +IntrusiveRefCntPtr<vfs::FileSystem> SourceMgr::getVirtualFileSystem() const { + return FS; +} + +void SourceMgr::setVirtualFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS) { + this->FS = std::move(FS); +} + unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile) { @@ -52,8 +69,11 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, ErrorOr<std::unique_ptr<MemoryBuffer>> SourceMgr::OpenIncludeFile(const std::string &Filename, std::string &IncludedFile) { + if (!FS) + reportFatalInternalError("Opening include file from SourceMgr without VFS"); + ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = - MemoryBuffer::getFile(Filename); + FS->getBufferForFile(Filename); SmallString<64> Buffer(Filename); // If the file didn't exist directly, see if it's in an include path. @@ -61,7 +81,7 @@ SourceMgr::OpenIncludeFile(const std::string &Filename, ++i) { Buffer = IncludeDirectories[i]; sys::path::append(Buffer, Filename); - NewBufOrErr = MemoryBuffer::getFile(Buffer); + NewBufOrErr = FS->getBufferForFile(Buffer); } if (NewBufOrErr) diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 42043f7..b1024a8 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -128,6 +129,7 @@ int llvm::TableGenMain(const char *argv0, // Record the location of the include directory so that the lexer can find // it later. SrcMgr.setIncludeDirs(IncludeDirs); + SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem()); TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs); diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp index 2c3726a..db45054 100644 --- a/llvm/lib/TableGen/Parser.cpp +++ b/llvm/lib/TableGen/Parser.cpp @@ -9,6 +9,7 @@ #include "llvm/TableGen/Parser.h" #include "TGParser.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/TableGen/Record.h" using namespace llvm; @@ -21,6 +22,7 @@ bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) { SrcMgr = SourceMgr(); SrcMgr.takeSourceBuffersFrom(InputSrcMgr); SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs()); + SrcMgr.setVirtualFileSystem(InputSrcMgr.getVirtualFileSystem()); SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(), InputSrcMgr.getDiagContext()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9926a4d..be2f2e4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16254,7 +16254,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { SplatVal > 1) { SDValue Pg = getPredicateForScalableVector(DAG, DL, VT); SDValue Res = - DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0), + DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0), DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32)); if (Negated) Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); @@ -22942,7 +22942,7 @@ static SDValue performIntrinsicCombine(SDNode *N, return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_sve_asrd: - return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) @@ -30047,7 +30047,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT); SDValue Res = - DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2); + DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2); if (Negated) Res = DAG.getNode(ISD::SUB, DL, ContainerVT, DAG.getConstant(0, DL, ContainerVT), Res); diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7322212..fe84193 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_USDOT : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>; def : GINodeEquiv<G_UDOT, AArch64udot>; def : GINodeEquiv<G_SDOT, AArch64sdot>; +def : GINodeEquiv<G_USDOT, AArch64usdot>; def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp index b3c9656..343fd81 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -40,7 +40,11 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo( getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)), HasStackFrame(MFI.hasStackFrame() ? std::optional<bool>(MFI.hasStackFrame()) - : std::nullopt) {} + : std::nullopt), + HasStreamingModeChanges( + MFI.hasStreamingModeChanges() + ? std::optional<bool>(MFI.hasStreamingModeChanges()) + : std::nullopt) {} void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this); @@ -55,6 +59,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields( YamlMFI.StackSizePPR.value_or(0)); if (YamlMFI.HasStackFrame) setHasStackFrame(*YamlMFI.HasStackFrame); + if (YamlMFI.HasStreamingModeChanges) + setHasStreamingModeChanges(*YamlMFI.HasStreamingModeChanges); } static std::pair<bool, bool> GetSignReturnAddress(const Function &F) { diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index bd0a17d..d1832f4 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -645,6 +645,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo { std::optional<uint64_t> StackSizeZPR; std::optional<uint64_t> StackSizePPR; std::optional<bool> HasStackFrame; + std::optional<bool> HasStreamingModeChanges; AArch64FunctionInfo() = default; AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI); @@ -659,6 +660,7 @@ template <> struct MappingTraits<AArch64FunctionInfo> { YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR); YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR); YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame); + YamlIO.mapOptional("hasStreamingModeChanges", MFI.HasStreamingModeChanges); } }; diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp index cdf2822..a90950d 100644 --- a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp @@ -75,6 +75,10 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) { if (Src != Dst) MRI->replaceRegWith(Dst, Src); + if (MI.getOperand(1).isUndef()) + for (MachineOperand &MO : MRI->use_operands(Dst)) + MO.setIsUndef(); + // MI must be erased from the basic block before recalculating the live // interval. LIS->RemoveMachineInstrFromMaps(MI); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index bc6b931..98a128e 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -265,7 +265,7 @@ def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> ]>; -def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>; +def AArch64asrd_m1 : SDNode<"AArch64ISD::ASRD_MERGE_OP1", SDT_AArch64Arith_Imm>; def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>; def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9e2d698..05a4313 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerTriOp(AArch64::G_UDOT); case Intrinsic::aarch64_neon_sdot: return LowerTriOp(AArch64::G_SDOT); + case Intrinsic::aarch64_neon_usdot: + return LowerTriOp(AArch64::G_USDOT); case Intrinsic::aarch64_neon_sqxtn: return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S); case Intrinsic::aarch64_neon_sqxtun: diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index 4749748..434ea67 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); + /// Attempts to find an insertion point before \p Inst where the status flags + /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of + /// the block is found. + std::pair<MachineBasicBlock::iterator, LiveRegs> + findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl<InstInfo>::const_iterator Inst); void emitStateChange(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ZAState From, ZAState To, LiveRegs PhysLiveRegs); @@ -337,6 +343,28 @@ private: MachineRegisterInfo *MRI = nullptr; }; +static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) { + LiveRegs PhysLiveRegs = LiveRegs::None; + if (!LiveUnits.available(AArch64::NZCV)) + PhysLiveRegs |= LiveRegs::NZCV; + // We have to track W0 and X0 separately as otherwise things can get + // confused if we attempt to preserve X0 but only W0 was defined. + if (!LiveUnits.available(AArch64::W0)) + PhysLiveRegs |= LiveRegs::W0; + if (!LiveUnits.available(AArch64::W0_HI)) + PhysLiveRegs |= LiveRegs::W0_HI; + return PhysLiveRegs; +} + +static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { + if (PhysLiveRegs & LiveRegs::NZCV) + LiveUnits.addReg(AArch64::NZCV); + if (PhysLiveRegs & LiveRegs::W0) + LiveUnits.addReg(AArch64::W0); + if (PhysLiveRegs & LiveRegs::W0_HI) + LiveUnits.addReg(AArch64::W0_HI); +} + FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && @@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { LiveRegUnits LiveUnits(*TRI); LiveUnits.addLiveOuts(MBB); - auto GetPhysLiveRegs = [&] { - LiveRegs PhysLiveRegs = LiveRegs::None; - if (!LiveUnits.available(AArch64::NZCV)) - PhysLiveRegs |= LiveRegs::NZCV; - // We have to track W0 and X0 separately as otherwise things can get - // confused if we attempt to preserve X0 but only W0 was defined. - if (!LiveUnits.available(AArch64::W0)) - PhysLiveRegs |= LiveRegs::W0; - if (!LiveUnits.available(AArch64::W0_HI)) - PhysLiveRegs |= LiveRegs::W0_HI; - return PhysLiveRegs; - }; - - Block.PhysLiveRegsAtExit = GetPhysLiveRegs(); + Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits); auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); auto FirstNonPhiInsertPt = MBB.getFirstNonPHI(); for (MachineInstr &MI : reverse(MBB)) { MachineBasicBlock::iterator MBBI(MI); LiveUnits.stepBackward(MI); - LiveRegs PhysLiveRegs = GetPhysLiveRegs(); + LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits); // The SMEStateAllocPseudo marker is added to a function if the save // buffer was allocated in SelectionDAG. It marks the end of the // allocation -- which is a safe point for this pass to insert any TPIDR2 @@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, return BundleStates; } +std::pair<MachineBasicBlock::iterator, LiveRegs> +MachineSMEABI::findStateChangeInsertionPoint( + MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl<InstInfo>::const_iterator Inst) { + LiveRegs PhysLiveRegs; + MachineBasicBlock::iterator InsertPt; + if (Inst != Block.Insts.end()) { + InsertPt = Inst->InsertPt; + PhysLiveRegs = Inst->PhysLiveRegs; + } else { + InsertPt = MBB.getFirstTerminator(); + PhysLiveRegs = Block.PhysLiveRegsAtExit; + } + + if (!(PhysLiveRegs & LiveRegs::NZCV)) + return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). + + // Find the previous state change. We can not move before this point. + MachineBasicBlock::iterator PrevStateChangeI; + if (Inst == Block.Insts.begin()) { + PrevStateChangeI = MBB.begin(); + } else { + // Note: `std::prev(Inst)` is the previous InstInfo. We only create an + // InstInfo object for instructions that require a specific ZA state, so the + // InstInfo is the site of the previous state change in the block (which can + // be several MIs earlier). + PrevStateChangeI = std::prev(Inst)->InsertPt; + } + + // Note: LiveUnits will only accurately track X0 and NZCV. + LiveRegUnits LiveUnits(*TRI); + setPhysLiveRegs(LiveUnits, PhysLiveRegs); + for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) { + // Don't move before/into a call (which may have a state change before it). + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall()) + break; + LiveUnits.stepBackward(*I); + if (LiveUnits.available(AArch64::NZCV)) + return {I, getPhysLiveRegs(LiveUnits)}; + } + return {InsertPt, PhysLiveRegs}; +} + void MachineSMEABI::insertStateChanges(EmitContext &Context, const FunctionInfo &FnInfo, const EdgeBundles &Bundles, @@ -490,10 +548,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, CurrentState = InState; for (auto &Inst : Block.Insts) { - if (CurrentState != Inst.NeededState) - emitStateChange(Context, MBB, Inst.InsertPt, CurrentState, - Inst.NeededState, Inst.PhysLiveRegs); - CurrentState = Inst.NeededState; + if (CurrentState != Inst.NeededState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, &Inst); + emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState, + PhysLiveRegs); + CurrentState = Inst.NeededState; + } } if (MBB.succ_empty()) @@ -501,9 +562,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, ZAState OutState = BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; - if (CurrentState != OutState) - emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState, - OutState, Block.PhysLiveRegsAtExit); + if (CurrentState != OutState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, Block.Insts.end()); + emitStateChange(Context, MBB, InsertPt, CurrentState, OutState, + PhysLiveRegs); + } } } diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index c684f9e..01a40c1 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates( if (TotalCyclesBetweenCandidates < NumMFMACycles - 1) InstrsToUnpack.insert(&Instr); } - return; } void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { @@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) { HiDstOp.setIsRenamable(DstOp.isRenamable()); I.eraseFromParent(); - return; } MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I, diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9945ecc..0d7b6d1 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -161,8 +161,8 @@ namespace { friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { return TE.PseudoOpc < PseudoOpc; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, - const NEONLdStTableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned PseudoOpc, + const NEONLdStTableEntry &TE) { return PseudoOpc < TE.PseudoOpc; } }; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 82c43ff..26a8728 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {} /// Returns the bit offset to backpatch with the location of the real VST. void DXILBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. - if (!M.getTargetTriple().empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, - M.getTargetTriple().str(), 0 /*TODO*/); - const std::string &DL = M.getDataLayoutStr(); - if (!DL.empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + + // We need to hardcode a triple and datalayout that's compatible with the + // historical DXIL triple and datalayout from DXC. + StringRef Triple = "dxil-ms-dx"; + StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" + "f16:32-f32:32-f64:64-n8:16:32:64"; + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + if (!M.getModuleInlineAsm().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp index 1eb03bf..725f2b1 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp @@ -149,11 +149,6 @@ public: std::string Data; llvm::raw_string_ostream OS(Data); - Triple OriginalTriple = M.getTargetTriple(); - // Set to DXIL triple when write to bitcode. - // Only the output bitcode need to be DXIL triple. - M.setTargetTriple(Triple("dxil-ms-dx")); - // Perform late legalization of lifetime intrinsics that would otherwise // fail the Module Verifier if performed in an earlier pass legalizeLifetimeIntrinsics(M); @@ -165,9 +160,6 @@ public: // not-so-legal legalizations removeLifetimeIntrinsics(M); - // Recover triple. - M.setTargetTriple(OriginalTriple); - Constant *ModuleConstant = ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data)); auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true, diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 52e6b0b..68f5312 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -174,8 +174,8 @@ namespace { const TargetRegisterInfo *TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) - LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegSet &P); raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { OS << '{'; for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index 14b6bb3..9087f9d 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -272,15 +272,14 @@ namespace { OS << *I << ' ' << **I << '\n'; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeVect &S) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S); raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) { dump_node_container(OS, S); return OS; } - raw_ostream &operator<< (raw_ostream &OS, - const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const NodeToUsesMap &M); raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ for (const auto &I : M) { const UseSet &Us = I.second; @@ -914,9 +913,8 @@ namespace { const NodeToValueMap ⤅ }; - raw_ostream &operator<< (raw_ostream &OS, - const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; - raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const LocationAsBlock &Loc) { for (const auto &I : Loc.Map) { OS << I.first << " -> "; if (BasicBlock *B = cast_or_null<BasicBlock>(I.second)) diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 14a7ae7..3900aac 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -132,8 +132,7 @@ namespace { const TargetRegisterInfo &TRI; friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); }; - raw_ostream &operator<<(raw_ostream &OS, - const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P); raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { OS << "{ SplitB:" << PrintMB(P.FP.SplitB) << ", PredR:" << printReg(P.FP.PredR, &P.TRI) diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index f9fdab4..9c81e96 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -51,11 +51,11 @@ private: const TargetRegisterInfo &TRI; }; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) - LLVM_ATTRIBUTE_UNUSED; - raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { - return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); - } +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, + const PrintRegister &PR); +raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) { + return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg); +} class HexagonGenPredicate : public MachineFunctionPass { public: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bfea50e..6b48a21 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) { return new HexagonTargetStreamer(S); } -static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) { if (STI->hasFeature(F)) STI->ToggleFeature(F); } -static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) { +[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) { return STI->hasFeature(F); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2..8bf0d11 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple<uint32_t, uint8_t>; + +static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa<ConstantSDNode>(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast<ConstantSDNode>(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (FullVal.isZero() || FullVal.isAllOnes()) + return SDValue(); + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value"); + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 59f3387..880aca7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1472,6 +1472,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2384959..2d8c633 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> { 126>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C), +// NOT(B), NAND(B,C)} +// - C is the "false" case op NOR(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNor<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 129>; + + // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>; + + // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)), + 133>; + + // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 134>; + + // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 138>; + + // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)), + 140>; + + // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 142>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv +// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B), +// NOT(C)} +// - C is the "false" case op EQV(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectEqv<ValueType Vt>{ + // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 151>; + + // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 152>; + + // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 154>; + + // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 158>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// B, NOT(B)} +// - C is the "false" case op NOT(C) +// ============================================================================= +multiclass XXEvalTernarySelectNotC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>; + + // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>; + + // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>; + + // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>; + + // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>; + + // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// C, NOT(B)} +// - C is the "false" case op NOT(B) +// ============================================================================= +multiclass XXEvalTernarySelectNotB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>; + + // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>; + + // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>; + + // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>; + + // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>; + + // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)} +// - C is the "false" case op NAND(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNand<ValueType Vt>{ + // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>; + + // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>; + + // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 230>; + + // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 231>; + + // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 233>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { defm : XXEvalTernarySelectC<Ty>; defm : XXEvalTernarySelectXor<Ty>; defm : XXEvalTernarySelectOr<Ty>; + defm : XXEvalTernarySelectNor<Ty>; + defm : XXEvalTernarySelectEqv<Ty>; + defm : XXEvalTernarySelectNotC<Ty>; + defm : XXEvalTernarySelectNotB<Ty>; + defm : XXEvalTernarySelectNand<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 8851a0f..e857b2d 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() { bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) { return StringSwitch<bool>(Format) - .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true) - .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj", + .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true) + .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"}, STI.hasFeature(RISCV::FeatureStdExtZca)) - .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es", + .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"}, !STI.hasFeature(RISCV::Feature64Bit)) .Default(false); } diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index 50730c6..ab93bba 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL"; bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) { // Return true if not one of the valid LMUL strings return StringSwitch<bool>(Data) - .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true) + .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true) .Default(false); } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index f863392a..637d61fe 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> // and floating point computation. // The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can // be one or two VA (Vector Arithmetic). -multiclass SiFive7ProcResources<bit extraVALU = false> { +multiclass SiFive7ProcResources<bit dualVALU = false> { let BufferSize = 0 in { def PipeA : ProcResource<1>; def PipeB : ProcResource<1>; @@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def FDiv : ProcResource<1>; // FP Division/Sqrt // Arithmetic sequencer(s) - if extraVALU then { + if dualVALU then { // VA1 can handle any vector airthmetic instruction. def VA1 : ProcResource<1>; // VA2 generally can only handle simple vector arithmetic. @@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> { def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"), !cast<ProcResource>(NAME#"PipeB")]>; - if extraVALU then + if dualVALU then def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"), !cast<ProcResource>(NAME#"VA2")]>; } @@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance { /// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and /// (2) WriteRes entries. It's parameterized by config values that will /// eventually be supplied by different SchedMachineModels. -multiclass SiFive7SchedResources<int vlen, bit extraVALU, +multiclass SiFive7SchedResources<int vlen, bit dualVALU, SiFive7FPLatencies fpLatencies, bit hasFastGather> { - defm SiFive7 : SiFive7ProcResources<extraVALU>; + defm SiFive7 : SiFive7ProcResources<dualVALU>; // Pull out defs from SiFive7ProcResources so we can refer to them by name. defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA); @@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU, defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv); defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv); // Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU. - defvar SiFive7VA1 = !if (extraVALU, + defvar SiFive7VA1 = !if (dualVALU, !cast<ProcResource>(NAME # SiFive7VA1), !cast<ProcResource>(NAME # SiFive7VA)); - defvar SiFive7VA1OrVA2 = !if (extraVALU, + defvar SiFive7VA1OrVA2 = !if (dualVALU, !cast<ProcResGroup>(NAME # SiFive7VA1OrVA2), !cast<ProcResource>(NAME # SiFive7VA)); defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA); @@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel { HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, HasStdExtZkr]; int VLEN = vlen; - bit HasExtraVALU = false; + bit HasDualVALU = false; SiFive7FPLatencies FPLatencies; bit HasFastGather = false; @@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> { } def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { - let HasExtraVALU = true; + let HasDualVALU = true; let FPLatencies = SiFive7LowFPLatencies; let HasFastGather = true; } @@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { /// Binding models to their scheduling resources. foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in { let SchedModel = model in - defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU, + defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU, model.FPLatencies, model.HasFastGather>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a466ab2..a0cff4d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform( SPIRV::Decoration::NonUniformEXT, {}); } } - return; } bool SPIRVInstructionSelector::extractSubvector( diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 2934c88..fa08d44 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } } -static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) -{ +[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) { for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) if (MRI->isPhysRegUsed(reg)) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index d9c8e22..6e99fc3 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) { .Case("i64", wasm::ValType::I64) .Case("f32", wasm::ValType::F32) .Case("f64", wasm::ValType::F64) - .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2", + .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"}, wasm::ValType::V128) .Case("funcref", wasm::ValType::FUNCREF) .Case("externref", wasm::ValType::EXTERNREF) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index a8908d4..ac251fd 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, // xacquire <insn> ; xacquire must be accompanied by 'lock' bool IsPrefix = StringSwitch<bool>(Name) - .Cases("cs", "ds", "es", "fs", "gs", "ss", true) - .Cases("rex64", "data32", "data16", "addr32", "addr16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) + .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true) + .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true) + .Cases({"xacquire", "xrelease"}, true) + .Cases({"acquire", "release"}, isParsingIntelSyntax()) .Default(false); auto isLockRepeatNtPrefix = [](StringRef N) { return StringSwitch<bool>(N) - .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) + .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"}, + true) .Default(false); }; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 2bf016a..6db780f 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,7 +1338,6 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); - // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel, } def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; -def : ProcModel<"pantherlake", AlderlakePModel, +foreach P = ["pantherlake", "wildcatlake"] in { +def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; +} def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index e0991aa..9f88fda 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -602,8 +602,7 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, - const TableEntry &TE) { + [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) { return V < TE.from; } }; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c32b1a6..a0b64ff 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) { SDValue Src = Op1; SDValue Op10 = Op1.getOperand(0); - if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) { - // res, flags2 = sub 0, (and (xor X, -1), Y) + if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) && + llvm::isOneConstant(Op1.getOperand(1))) { + // res, flags2 = sub 0, (and (xor X, -1), 1) // cload/cstore ..., cond_ne, flag2 // -> - // res, flags2 = sub 0, (and X, Y) + // res, flags2 = sub 0, (and X, 1) // cload/cstore ..., cond_e, flag2 Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0), Op1.getOperand(1)); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0fd44b7..ec31675 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; + +// If the globaladdr is an absolute_symbol, don't bother using the sign extending +// instruction since there's no benefit to using it with absolute symbols. +def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{ + auto *GA = cast<GlobalAddressSDNode>(N); + return !GA->getGlobal()->getAbsoluteSymbolRange(); +}]>; +def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)), + (MOV64ri32 tglobaladdr:$dst)>, + Requires<[KernelCode]>; + def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp index 89d5e0d..f6cea85 100644 --- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp +++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp @@ -22,13 +22,13 @@ StringRef ARM::getArchSynonym(StringRef Arch) { .Case("v5e", "v5te") .Case("v6j", "v6") .Case("v6hl", "v6k") - .Cases("v6m", "v6sm", "v6s-m", "v6-m") - .Cases("v6z", "v6zk", "v6kz") - .Cases("v7", "v7a", "v7hl", "v7l", "v7-a") + .Cases({"v6m", "v6sm", "v6s-m"}, "v6-m") + .Cases({"v6z", "v6zk"}, "v6kz") + .Cases({"v7", "v7a", "v7hl", "v7l"}, "v7-a") .Case("v7r", "v7-r") .Case("v7m", "v7-m") .Case("v7em", "v7e-m") - .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a") + .Cases({"v8", "v8a", "v8l", "aarch64", "arm64"}, "v8-a") .Case("v8.1a", "v8.1-a") .Case("v8.2a", "v8.2-a") .Case("v8.3a", "v8.3-a") @@ -39,7 +39,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) { .Case("v8.8a", "v8.8-a") .Case("v8.9a", "v8.9-a") .Case("v8r", "v8-r") - .Cases("v9", "v9a", "v9-a") + .Cases({"v9", "v9a"}, "v9-a") .Case("v9.1a", "v9.1-a") .Case("v9.2a", "v9.2-a") .Case("v9.3a", "v9.3-a") diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index a5bdc9d..3479106 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -70,8 +70,8 @@ using namespace llvm; -static std::unique_ptr<llvm::MemoryBuffer> - LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { +[[maybe_unused]] static std::unique_ptr<llvm::MemoryBuffer> +getProcCpuinfoContent() { const char *CPUInfoFile = "/proc/cpuinfo"; if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO")) CPUInfoFile = CpuinfoIntercept; @@ -964,6 +964,13 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, *Subtype = X86::INTEL_COREI7_PANTHERLAKE; break; + // Wildcatlake: + case 0xd5: + CPU = "wildcatlake"; + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_PANTHERLAKE; + break; + // Graniterapids: case 0xad: CPU = "graniterapids"; diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp index 950bb2b..d765d9c 100644 --- a/llvm/lib/TargetParser/TargetDataLayout.cpp +++ b/llvm/lib/TargetParser/TargetDataLayout.cpp @@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const { case Triple::csky: return computeCSKYDataLayout(*this); case Triple::dxil: + // TODO: We need to align vectors on the element size generally, but for now + // we hard code this for 3-element 32- and 64-bit vectors as a workaround. + // See https://github.com/llvm/llvm-project/issues/123968 return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-" - "f32:32-f64:64-n8:16:32:64"; + "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"; case Triple::hexagon: return "e-m:e-p:32:32:32-a:0-n16:32-" "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-" diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index f021094..1068ce4 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -579,87 +579,89 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { } static Triple::ArchType parseArch(StringRef ArchName) { - auto AT = StringSwitch<Triple::ArchType>(ArchName) - .Cases("i386", "i486", "i586", "i686", Triple::x86) - // FIXME: Do we need to support these? - .Cases("i786", "i886", "i986", Triple::x86) - .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) - .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc) - .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle) - .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64) - .Cases("powerpc64le", "ppc64le", Triple::ppc64le) - .Case("xscale", Triple::arm) - .Case("xscaleeb", Triple::armeb) - .Case("aarch64", Triple::aarch64) - .Case("aarch64_be", Triple::aarch64_be) - .Case("aarch64_32", Triple::aarch64_32) - .Case("arc", Triple::arc) - .Case("arm64", Triple::aarch64) - .Case("arm64_32", Triple::aarch64_32) - .Case("arm64e", Triple::aarch64) - .Case("arm64ec", Triple::aarch64) - .Case("arm", Triple::arm) - .Case("armeb", Triple::armeb) - .Case("thumb", Triple::thumb) - .Case("thumbeb", Triple::thumbeb) - .Case("avr", Triple::avr) - .Case("m68k", Triple::m68k) - .Case("msp430", Triple::msp430) - .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6", - "mipsr6", Triple::mips) - .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el", - Triple::mipsel) - .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6", - "mips64r6", "mipsn32r6", Triple::mips64) - .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el", - "mipsn32r6el", Triple::mips64el) - .Case("r600", Triple::r600) - .Case("amdgcn", Triple::amdgcn) - .Case("riscv32", Triple::riscv32) - .Case("riscv64", Triple::riscv64) - .Case("riscv32be", Triple::riscv32be) - .Case("riscv64be", Triple::riscv64be) - .Case("hexagon", Triple::hexagon) - .Cases("s390x", "systemz", Triple::systemz) - .Case("sparc", Triple::sparc) - .Case("sparcel", Triple::sparcel) - .Cases("sparcv9", "sparc64", Triple::sparcv9) - .Case("tce", Triple::tce) - .Case("tcele", Triple::tcele) - .Case("xcore", Triple::xcore) - .Case("nvptx", Triple::nvptx) - .Case("nvptx64", Triple::nvptx64) - .Case("amdil", Triple::amdil) - .Case("amdil64", Triple::amdil64) - .Case("hsail", Triple::hsail) - .Case("hsail64", Triple::hsail64) - .Case("spir", Triple::spir) - .Case("spir64", Triple::spir64) - .Cases("spirv", "spirv1.5", "spirv1.6", Triple::spirv) - .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2", - "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", - "spirv32v1.6", Triple::spirv32) - .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", - "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", - "spirv64v1.6", Triple::spirv64) - .StartsWith("kalimba", Triple::kalimba) - .Case("lanai", Triple::lanai) - .Case("renderscript32", Triple::renderscript32) - .Case("renderscript64", Triple::renderscript64) - .Case("shave", Triple::shave) - .Case("ve", Triple::ve) - .Case("wasm32", Triple::wasm32) - .Case("wasm64", Triple::wasm64) - .Case("csky", Triple::csky) - .Case("loongarch32", Triple::loongarch32) - .Case("loongarch64", Triple::loongarch64) - .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3", - "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", - "dxilv1.8", Triple::dxil) - // Note: Cases has max limit of 10. - .Case("dxilv1.9", Triple::dxil) - .Case("xtensa", Triple::xtensa) - .Default(Triple::UnknownArch); + auto AT = + StringSwitch<Triple::ArchType>(ArchName) + .Cases({"i386", "i486", "i586", "i686"}, Triple::x86) + // FIXME: Do we need to support these? + .Cases({"i786", "i886", "i986"}, Triple::x86) + .Cases({"amd64", "x86_64", "x86_64h"}, Triple::x86_64) + .Cases({"powerpc", "powerpcspe", "ppc", "ppc32"}, Triple::ppc) + .Cases({"powerpcle", "ppcle", "ppc32le"}, Triple::ppcle) + .Cases({"powerpc64", "ppu", "ppc64"}, Triple::ppc64) + .Cases({"powerpc64le", "ppc64le"}, Triple::ppc64le) + .Case("xscale", Triple::arm) + .Case("xscaleeb", Triple::armeb) + .Case("aarch64", Triple::aarch64) + .Case("aarch64_be", Triple::aarch64_be) + .Case("aarch64_32", Triple::aarch64_32) + .Case("arc", Triple::arc) + .Case("arm64", Triple::aarch64) + .Case("arm64_32", Triple::aarch64_32) + .Case("arm64e", Triple::aarch64) + .Case("arm64ec", Triple::aarch64) + .Case("arm", Triple::arm) + .Case("armeb", Triple::armeb) + .Case("thumb", Triple::thumb) + .Case("thumbeb", Triple::thumbeb) + .Case("avr", Triple::avr) + .Case("m68k", Triple::m68k) + .Case("msp430", Triple::msp430) + .Cases({"mips", "mipseb", "mipsallegrex", "mipsisa32r6", "mipsr6"}, + Triple::mips) + .Cases({"mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el"}, + Triple::mipsel) + .Cases({"mips64", "mips64eb", "mipsn32", "mipsisa64r6", "mips64r6", + "mipsn32r6"}, + Triple::mips64) + .Cases({"mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el", + "mipsn32r6el"}, + Triple::mips64el) + .Case("r600", Triple::r600) + .Case("amdgcn", Triple::amdgcn) + .Case("riscv32", Triple::riscv32) + .Case("riscv64", Triple::riscv64) + .Case("riscv32be", Triple::riscv32be) + .Case("riscv64be", Triple::riscv64be) + .Case("hexagon", Triple::hexagon) + .Cases({"s390x", "systemz"}, Triple::systemz) + .Case("sparc", Triple::sparc) + .Case("sparcel", Triple::sparcel) + .Cases({"sparcv9", "sparc64"}, Triple::sparcv9) + .Case("tce", Triple::tce) + .Case("tcele", Triple::tcele) + .Case("xcore", Triple::xcore) + .Case("nvptx", Triple::nvptx) + .Case("nvptx64", Triple::nvptx64) + .Case("amdil", Triple::amdil) + .Case("amdil64", Triple::amdil64) + .Case("hsail", Triple::hsail) + .Case("hsail64", Triple::hsail64) + .Case("spir", Triple::spir) + .Case("spir64", Triple::spir64) + .Cases({"spirv", "spirv1.5", "spirv1.6"}, Triple::spirv) + .Cases({"spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2", + "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", "spirv32v1.6"}, + Triple::spirv32) + .Cases({"spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", + "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6"}, + Triple::spirv64) + .StartsWith("kalimba", Triple::kalimba) + .Case("lanai", Triple::lanai) + .Case("renderscript32", Triple::renderscript32) + .Case("renderscript64", Triple::renderscript64) + .Case("shave", Triple::shave) + .Case("ve", Triple::ve) + .Case("wasm32", Triple::wasm32) + .Case("wasm64", Triple::wasm64) + .Case("csky", Triple::csky) + .Case("loongarch32", Triple::loongarch32) + .Case("loongarch64", Triple::loongarch64) + .Cases({"dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3", + "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8", + "dxilv1.9"}, + Triple::dxil) + .Case("xtensa", Triple::xtensa) + .Default(Triple::UnknownArch); // Some architectures require special parsing logic just to compute the // ArchType result. @@ -1071,7 +1073,7 @@ Triple::Triple(std::string &&Str) : Data(std::move(Str)) { .StartsWith("mips64", Triple::GNUABI64) .StartsWith("mipsisa64", Triple::GNUABI64) .StartsWith("mipsisa32", Triple::GNU) - .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU) + .Cases({"mips", "mipsel", "mipsr6", "mipsr6el"}, Triple::GNU) .Default(UnknownEnvironment); } } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 1932a3a..e382cfe 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -378,6 +378,7 @@ constexpr ProcInfo Processors[] = { { {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false }, // Pantherlake microarchitecture based processors. { {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, + { {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false }, // Sierraforest microarchitecture based processors. { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Grandridge microarchitecture based processors. diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 7c78eb3..444b390 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -396,9 +396,8 @@ class CHR { } // end anonymous namespace -static inline -raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, - const CHRStats &Stats) { +[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS, + const CHRStats &Stats) { Stats.print(OS); return OS; } @@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { return PSI.isFunctionEntryHot(&F); } -static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, - CHRStats *Stats) { +[[maybe_unused]] static void dumpIR(Function &F, const char *Label, + CHRStats *Stats) { StringRef FuncName = F.getName(); StringRef ModuleName = F.getParent()->getName(); (void)(FuncName); // Unused in release build. @@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope, } // Assert that all the CHR regions of the scope have a biased branch or select. -static void LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { #ifndef NDEBUG auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { @@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { // Assert that all the condition values of the biased branches and selects have // been hoisted to the pre-entry block or outside of the scope. -static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( - CHRScope *Scope, BasicBlock *PreEntryBlock) { +[[maybe_unused]] static void +assertBranchOrSelectConditionHoisted(CHRScope *Scope, + BasicBlock *PreEntryBlock) { CHR_DEBUG(dbgs() << "Biased regions condition values \n"); for (RegInfo &RI : Scope->CHRRegions) { Region *R = RI.R; @@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { } } -static void LLVM_ATTRIBUTE_UNUSED -dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { +[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, + const char *Label) { dbgs() << Label << " " << Scopes.size() << "\n"; for (CHRScope *Scope : Scopes) { dbgs() << *Scope << "\n"; diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 09db464..386e48f 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff; namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, - BBState &BBState) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState); } // end namespace llvm diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h index 232db2b..5cc4212 100644 --- a/llvm/lib/Transforms/ObjCARC/PtrState.h +++ b/llvm/lib/Transforms/ObjCARC/PtrState.h @@ -47,8 +47,7 @@ enum Sequence { S_MovableRelease ///< objc_release(x), !clang.imprecise_release. }; -raw_ostream &operator<<(raw_ostream &OS, - const Sequence S) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S); /// Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 7ad710d..6141b6d 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -77,6 +77,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, return nullptr; } -namespace { // Returns true if \p I is an intrinsic that does not read or write memory. -bool isNoopIntrinsic(Instruction *I) { +static bool isNoopIntrinsic(Instruction *I) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) { } // Check if we can ignore \p D for DSE. -bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { +static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { Instruction *DI = D->getMemoryInst(); // Calls that only access inaccessible memory cannot read or write any memory // locations we consider for elimination. @@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return false; } +namespace { + // A memory location wrapper that represents a MemoryLocation, `MemLoc`, // defined by `MemDef`. struct MemoryLocationWrapper { @@ -889,23 +891,25 @@ struct MemoryDefWrapper { SmallVector<MemoryLocationWrapper, 1> DefinedLocations; }; -bool hasInitializesAttr(Instruction *I) { - CallBase *CB = dyn_cast<CallBase>(I); - return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); -} - struct ArgumentInitInfo { unsigned Idx; bool IsDeadOrInvisibleOnUnwind; ConstantRangeList Inits; }; +} // namespace + +static bool hasInitializesAttr(Instruction *I) { + CallBase *CB = dyn_cast<CallBase>(I); + return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); +} // Return the intersected range list of the initializes attributes of "Args". // "Args" are call arguments that alias to each other. // If any argument in "Args" doesn't have dead_on_unwind attr and // "CallHasNoUnwindAttr" is false, return empty. -ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, - bool CallHasNoUnwindAttr) { +static ConstantRangeList +getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, + bool CallHasNoUnwindAttr) { if (Args.empty()) return {}; @@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, return IntersectedIntervals; } +namespace { + struct DSEState { Function &F; AliasAnalysis &AA; @@ -2328,10 +2334,11 @@ struct DSEState { // change state: whether make any change. bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper); }; +} // namespace // Return true if "Arg" is function local and isn't captured before "CB". -bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, - EarliestEscapeAnalysis &EA) { +static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, + EarliestEscapeAnalysis &EA) { const Value *UnderlyingObj = getUnderlyingObject(Arg); return isIdentifiedFunctionLocal(UnderlyingObj) && capturesNothing( @@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, return MadeChange; } -} // end anonymous namespace //===----------------------------------------------------------------------===// // DSE Pass @@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) -namespace llvm { -LLVM_ABI FunctionPass *createDeadStoreEliminationPass() { +LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSELegacyPass(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index 1c88532..b9534def 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -73,24 +73,17 @@ #include <utility> using namespace llvm; +using namespace llvm::GVNExpression; #define DEBUG_TYPE "gvn-sink" STATISTIC(NumRemoved, "Number of instructions removed"); -namespace llvm { -namespace GVNExpression { - LLVM_DUMP_METHOD void Expression::dump() const { print(dbgs()); dbgs() << "\n"; } -} // end namespace GVNExpression -} // end namespace llvm - -namespace { - static bool isMemoryInst(const Instruction *I) { return isa<LoadInst>(I) || isa<StoreInst>(I) || (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) || @@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) { //===----------------------------------------------------------------------===// +namespace { + /// Candidate solution for sinking. There may be different ways to /// sink instructions, differing in the number of instructions sunk, /// the number of predecessors sunk from and the number of PHIs @@ -125,14 +120,6 @@ struct SinkingInstructionCandidate { } }; -#ifndef NDEBUG -raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { - OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks - << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; - return OS; -} -#endif - //===----------------------------------------------------------------------===// /// Describes a PHI node that may or may not exist. These track the PHIs @@ -256,8 +243,18 @@ public: return Values == Other.Values && Blocks == Other.Blocks; } }; +} // namespace -template <typename ModelledPHI> struct DenseMapInfo { +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks + << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; + return OS; +} +#endif + +template <> struct llvm::DenseMapInfo<ModelledPHI> { static inline ModelledPHI &getEmptyKey() { static ModelledPHI Dummy = ModelledPHI::createDummy(0); return Dummy; @@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo { } }; -using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; +using ModelledPHISet = DenseSet<ModelledPHI>; + +namespace { //===----------------------------------------------------------------------===// // ValueTable @@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; /// /// This class also contains fields for discriminators used when determining /// equivalence of instructions with sideeffects. -class InstructionUseExpr : public GVNExpression::BasicExpression { +class InstructionUseExpr : public BasicExpression { unsigned MemoryUseOrder = -1; bool Volatile = false; ArrayRef<int> ShuffleMask; @@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { public: InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, BumpPtrAllocator &A) - : GVNExpression::BasicExpression(I->getNumUses()) { + : BasicExpression(I->getNumUses()) { allocateOperands(R, A); setOpcode(I->getOpcode()); setType(I->getType()); @@ -315,8 +314,8 @@ public: void setVolatile(bool V) { Volatile = V; } hash_code getHashValue() const override { - return hash_combine(GVNExpression::BasicExpression::getHashValue(), - MemoryUseOrder, Volatile, ShuffleMask); + return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder, + Volatile, ShuffleMask); } template <typename Function> hash_code getHashValue(Function MapFn) { @@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>; class ValueTable { DenseMap<Value *, uint32_t> ValueNumbering; - DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; + DenseMap<Expression *, uint32_t> ExpressionNumbering; DenseMap<size_t, uint32_t> HashNumbering; BumpPtrAllocator Allocator; ArrayRecycler<Value *> Recycler; @@ -594,6 +593,7 @@ private: } } }; +} // namespace std::optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI, @@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, NumRemoved += Insts.size() - 1; } -} // end anonymous namespace - PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { GVNSink G; if (!G.run(F)) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 578fec7..a692009 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -551,12 +551,10 @@ public: } /// Support comparison with a single offset to allow binary searches. - friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS, - uint64_t RHSOffset) { + [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) { return LHS.beginOffset() < RHSOffset; } - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const Slice &RHS) { + [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) { return LHSOffset < RHS.beginOffset(); } diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b18acea..4fe736a 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } Phi.replaceAllUsesWith(RdxResult); - continue; } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b96d29e..62a81ba 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // the vector loop or when not folding the tail. In the later case, we know // that the canonical induction increment will not overflow as the vector trip // count is >= increment and a multiple of the increment. + VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; if (!HasNUW) { - auto *IVInc = Plan->getVectorLoopRegion() - ->getExitingBasicBlock() - ->getTerminator() - ->getOperand(0); - assert(match(IVInc, m_VPInstruction<Instruction::Add>( - m_Specific(Plan->getCanonicalIV()), m_VPValue())) && + auto *IVInc = + LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0); + assert(match(IVInc, + m_VPInstruction<Instruction::Add>( + m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) && "Did not find the canonical IV increment"); cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags(); } @@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. - VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( HeaderVPBB); @@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( for (VPValue *Old : Old2New.keys()) Old->getDefiningRecipe()->eraseFromParent(); - assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) && - !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && + assert(isa<VPRegionBlock>(LoopRegion) && + !LoopRegion->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); @@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { if (ResumePhiIter == MainScalarPH->phis().end()) { VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); ResumePhi = ScalarPHBuilder.createScalarPhi( - {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, - "vec.epilog.resume.val"); + {VectorTC, + MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()}, + {}, "vec.epilog.resume.val"); } else { ResumePhi = cast<VPPhi>(&*ResumePhiIter); if (MainScalarPH->begin() == MainScalarPH->end()) @@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to be // adjusted by the value after the main vector loop. Find the resume value // created during execution of the main VPlan. It must be the first phi in the diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 23f5623..0e0b042 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1012,6 +1012,8 @@ public: // part if scalar. In the latter case, the recipe will be removed during // unrolling. ExtractLastElement, + // Extracts the last lane for each part from its operand. + ExtractLastLanePerPart, // Extracts the second-to-last lane from its operand or the second-to-last // part if it is scalar. In the latter case, the recipe will be removed // during unrolling. @@ -4058,6 +4060,19 @@ public: /// Remove the current region from its VPlan, connecting its predecessor to /// its entry, and its exiting block to its successor. void dissolveToCFGLoop(); + + /// Returns the canonical induction recipe of the region. + VPCanonicalIVPHIRecipe *getCanonicalIV() { + VPBasicBlock *EntryVPBB = getEntryBasicBlock(); + if (EntryVPBB->empty()) { + // VPlan native path. TODO: Unify both code paths. + EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); + } + return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); + } + const VPCanonicalIVPHIRecipe *getCanonicalIV() const { + return const_cast<VPRegionBlock *>(this)->getCanonicalIV(); + } }; /// VPlan models a candidate for vectorization, encoding various decisions take @@ -4252,12 +4267,14 @@ public: BackedgeTakenCount = new VPValue(); return BackedgeTakenCount; } + VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; } /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } /// Returns the VF of the vector loop region. VPValue &getVF() { return VF; }; + const VPValue &getVF() const { return VF; }; /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } @@ -4369,16 +4386,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - /// Returns the canonical induction recipe of the vector loop. - VPCanonicalIVPHIRecipe *getCanonicalIV() { - VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); - if (EntryVPBB->empty()) { - // VPlan native path. - EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); - } - return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); - } - VPValue *getSCEVExpansion(const SCEV *S) const { return SCEVToExpansion.lookup(S); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 07bfe7a..f413c63 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: { Type *BaseTy = inferScalarType(R->getOperand(0)); if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index c0147ce..332791a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, } VPIRMetadata VPBranchWeights; - auto *Term = VPBuilder(CheckBlockVPBB) - .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, - Plan.getCanonicalIV()->getDebugLoc()); + auto *Term = + VPBuilder(CheckBlockVPBB) + .createNaryOp( + VPInstruction::BranchOnCond, {CondVPV}, + Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc()); if (AddBranchWeights) { MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = @@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) { if (DerivedIV->getNumUsers() == 1 && DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) { - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), - &Plan.getVectorTripCount()); + auto *NewSel = Builder.createSelect( + AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount()); DerivedIV->moveAfter(&*Builder.getInsertPoint()); DerivedIV->setOperand(1, NewSel); continue; @@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { "FMaxNum/FMinNum reduction.\n"); return false; } - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV); + auto *NewSel = + Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV); ResumeR->setOperand(0, NewSel); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b42b049..ff286f7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -372,6 +372,12 @@ m_ExtractLastElement(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t> +m_ExtractLastLanePerPart(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0); +} + template <typename Op0_t, typename Op1_t, typename Op2_t> inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t> m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { @@ -394,6 +400,12 @@ m_AnyOf(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::AnyOf>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t> +m_FirstActiveLane(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0); +} + template <unsigned Opcode, typename Op0_t> inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) { return AllRecipe_match<Opcode, Op0_t>(Op0); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 0c27d53..fb17d5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) { // non-phi instructions. auto &Plan = *HeaderVPBB->getPlan(); - auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV()); + auto *IV = + new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV()); Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi()); Builder.insert(IV); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2368d18..775837f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExplicitVectorLength: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: case VPInstruction::Not: @@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) { return ReducedPartRdx; } + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: { - unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2; + unsigned Offset = + getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1; Value *Res; if (State.VF.isVector()) { assert(Offset <= State.VF.getKnownMinValue() && @@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractLastElement || + getOpcode() == VPInstruction::ExtractLastLanePerPart || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::ExtractLane || @@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExtractLane: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::ActiveLaneMask: case VPInstruction::FirstActiveLane: @@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractLastElement: O << "extract-last-element"; break; + case VPInstruction::ExtractLastLanePerPart: + O << "extract-last-lane-per-part"; + break; case VPInstruction::ExtractPenultimateElement: O << "extract-penultimate-element"; break; @@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return false; auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); - auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); + auto *CanIV = getParent()->getParent()->getCanonicalIV(); return StartC && StartC->isZero() && StepC && StepC->isOne() && getScalarType() == CanIV->getScalarType(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 40b7e8d..8d76b2d8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) { /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan) { - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; for (VPUser *U : CanonicalIV->users()) { WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); @@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { if (!WidenNewIV) return; - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); @@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder) { - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); @@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, ScalarEvolution &SE) { VPValue *Incoming, *Mask; if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>( - m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Mask)), - m_VPValue(Incoming)))) + m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming, SE); @@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, return nullptr; // Calculate the final index. - VPValue *EndValue = Plan.getCanonicalIV(); - auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *CanonicalIV = LoopRegion->getCanonicalIV(); + Type *CanonicalIVType = CanonicalIV->getScalarType(); VPBuilder B(cast<VPBasicBlock>(PredVPBB)); DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc(); @@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane); FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType, FirstActiveLaneType, DL); - EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL); + VPValue *EndValue = + B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL); // `getOptimizableIVOf()` always returns the pre-incremented IV, so if it // changed it means the exit is using the incremented value, so we need to @@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } // Look through ExtractLastElement (BuildVector ....). - if (match(&R, m_ExtractLastElement(m_BuildVector()))) { + if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), + m_ExtractLastLanePerPart(m_BuildVector())))) { auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); Def->replaceAllUsesWith( BuildVector->getOperand(BuildVector->getNumOperands() - 1)); @@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) { + if (match(Def, + m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))), + m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) { Def->replaceAllUsesWith(A); return; } - if (match(Def, - m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) && + if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)), + m_ExtractLastLanePerPart(m_VPValue(A)))) && ((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) || (isa<VPReplicateRecipe>(A) && cast<VPReplicateRecipe>(A)->isSingleScalar())) && @@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) { return Def->replaceAllUsesWith(A); } + + if (Plan->getUF() == 1 && + match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) { + return Def->replaceAllUsesWith( + Builder.createNaryOp(VPInstruction::ExtractLastElement, {A})); + } } void VPlanTransforms::simplifyRecipes(VPlan &Plan) { @@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); Clone->insertBefore(RepOrWidenR); - auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement, - {Clone->getOperand(0)}); + unsigned ExtractOpc = + vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) + ? VPInstruction::ExtractLastElement + : VPInstruction::ExtractLastLanePerPart; + auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)}); Ext->insertBefore(Clone); Clone->setOperand(0, Ext); RepR->eraseFromParent(); @@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || match(cast<VPRecipeBase>(U), - m_ExtractLastElement(m_VPValue())); + m_CombineOr(m_ExtractLastElement(m_VPValue()), + m_ExtractLastLanePerPart(m_VPValue()))); })) continue; @@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE); }); - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV(); if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ, m_Specific(CanIV->getBackedgeValue()), m_Specific(&Plan.getVectorTripCount())))) @@ -2319,7 +2334,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = TopRegion->getCanonicalIV(); VPValue *StartV = CanonicalIVPHI->getStartValue(); auto *CanonicalIVIncrement = @@ -2358,7 +2373,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the active lane mask instruction in the VPlan preheader. VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1)); auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC, ALMMultiplier}, DL, "active.lane.mask.entry"); @@ -2394,13 +2409,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( /// TODO: Introduce explicit recipe for header-mask instead of searching /// for the header-mask pattern manually. static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); SmallVector<VPValue *> WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); - assert(count_if(Plan.getCanonicalIV()->users(), + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); + assert(count_if(LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>) <= 1 && "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { + if (FoundWidenCanonicalIVUser != + LoopRegion->getCanonicalIV()->users().end()) { auto *WideCanonicalIV = cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); WideCanonicalIVs.push_back(WideCanonicalIV); @@ -2408,7 +2425,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { // Also include VPWidenIntOrFpInductionRecipes that represent a widened // version of the canonical induction. - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); if (WidenOriginalIV && WidenOriginalIV->isCanonical()) @@ -2441,8 +2458,9 @@ void VPlanTransforms::addActiveLaneMask( "DataAndControlFlowWithoutRuntimeCheck implies " "UseActiveLaneMaskForControlFlow"); - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); assert(FoundWidenCanonicalIVUser && "Must have widened canonical IV when tail folding!"); VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan); @@ -2455,7 +2473,7 @@ void VPlanTransforms::addActiveLaneMask( } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1)); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier}, @@ -2565,9 +2583,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { }); assert(all_of(Plan.getVFxUF().users(), - [&Plan](VPUser *U) { - return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()), - m_Specific(&Plan.getVFxUF()))) || + [&LoopRegion, &Plan](VPUser *U) { + return match(U, + m_c_Add(m_Specific(LoopRegion->getCanonicalIV()), + m_Specific(&Plan.getVFxUF()))) || isa<VPWidenPointerInductionRecipe>(U); }) && "Only users of VFxUF should be VPWidenPointerInductionRecipe and the " @@ -2722,9 +2741,10 @@ void VPlanTransforms::addExplicitVectorLength( VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) { if (Plan.hasScalarVFOnly()) return; - VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); auto *CanIVTy = CanonicalIVPHI->getScalarType(); VPValue *StartV = CanonicalIVPHI->getStartValue(); @@ -4164,7 +4184,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Adjust induction to reflect that the transformed plan only processes one // original iteration. - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = VectorLoop->getCanonicalIV(); auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 1c4adfc..5aeda3e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -69,7 +69,8 @@ class UnrollState { VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { - Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); + Type *CanIVIntTy = + Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType(); return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); } @@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // Compute*Result which combine all parts to compute the final value. VPValue *Op1; if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) || - match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Op1))) || + match(&R, m_FirstActiveLane(m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>( m_VPValue(), m_VPValue(), m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>( diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 66748c5..8b1b0e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { return Expanded; } -bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { +bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { if (isa<VPActiveLaneMaskPHIRecipe>(V)) return true; @@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) return B == Plan.getTripCount() && - (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(), - m_Specific(&Plan.getVF()))) || + (match(A, + m_ScalarIVSteps( + m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), + m_One(), m_Specific(&Plan.getVF()))) || IsWideCanonicalIV(A)); return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && - B == Plan.getOrCreateBackedgeTakenCount(); + B == Plan.getBackedgeTakenCount(); } const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { @@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return all_of(R->operands(), isUniformAcrossVFsAndUFs); } - auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV(); + auto *CanonicalIV = + R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); // Canonical IV chain is uniform. if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) return true; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 0222b0a..cf95ac0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) { } /// Return true if \p V is a header mask in \p Plan. -bool isHeaderMask(const VPValue *V, VPlan &Plan); +bool isHeaderMask(const VPValue *V, const VPlan &Plan); /// Checks if \p V is uniform across all VF lanes and UF parts. It is considered /// as such if it is either loop invariant (defined outside the vector region) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 5262af6..91734a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } } - if (const auto *EVL = dyn_cast<VPInstruction>(&R)) { - if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength && - !verifyEVLRecipe(*EVL)) { - errs() << "EVL VPValue is not used correctly\n"; - return false; + if (const auto *VPI = dyn_cast<VPInstruction>(&R)) { + switch (VPI->getOpcode()) { + case VPInstruction::ExplicitVectorLength: + if (!verifyEVLRecipe(*VPI)) { + errs() << "EVL VPValue is not used correctly\n"; + return false; + } + break; + default: + break; } } } |