aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp18
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp117
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp39
-rw-r--r--llvm/lib/BinaryFormat/XCOFF.cpp30
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp6
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp1
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp14
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp82
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp4
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp53
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp26
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp28
-rw-r--r--llvm/lib/IR/AsmWriter.cpp11
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp7
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp2
-rw-r--r--llvm/lib/Support/PrettyStackTrace.cpp2
-rw-r--r--llvm/lib/Support/SourceMgr.cpp24
-rw-r--r--llvm/lib/TableGen/Main.cpp2
-rw-r--r--llvm/lib/TableGen/Parser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MachineSMEABIPass.cpp108
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp15
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp140
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrP10.td189
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td20
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp1
-rw-r--r--llvm/lib/Target/Sparc/SparcFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp45
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp11
-rw-r--r--llvm/lib/Target/X86/X86.td5
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td13
-rw-r--r--llvm/lib/TargetParser/ARMTargetParserCommon.cpp10
-rw-r--r--llvm/lib/TargetParser/Host.cpp11
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp5
-rw-r--r--llvm/lib/TargetParser/Triple.cpp166
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp20
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp3
-rw-r--r--llvm/lib/Transforms/ObjCARC/PtrState.h3
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h27
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp88
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp15
83 files changed, 1174 insertions, 525 deletions
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index f90717d..1d1a5560 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -61,6 +61,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
cl::Hidden, cl::init(""));
+static cl::opt<bool> StopImmediatelyForTest("ml-inliner-stop-immediately",
+ cl::Hidden);
+
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
@@ -214,6 +217,7 @@ MLInlineAdvisor::MLInlineAdvisor(
return;
}
ModelRunner->switchContext("");
+ ForceStop = StopImmediatelyForTest;
}
unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
@@ -379,9 +383,17 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) {
- if (!PSI.isFunctionEntryCold(&Caller))
- return std::make_unique<InlineAdvice>(this, CB, ORE,
- GetDefaultAdvice(CB));
+ if (!PSI.isFunctionEntryCold(&Caller)) {
+ // Return a MLInlineAdvice, despite delegating to the default advice,
+ // because we need to keep track of the internal state. This is different
+ // from the other instances where we return a "default" InlineAdvice,
+ // which happen at points we won't come back to the MLAdvisor for
+ // decisions requiring that state.
+ return ForceStop ? std::make_unique<InlineAdvice>(this, CB, ORE,
+ GetDefaultAdvice(CB))
+ : std::make_unique<MLInlineAdvice>(this, CB, ORE,
+ GetDefaultAdvice(CB));
+ }
}
auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
// If this is a "never inline" case, there won't be any changes to internal
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index 1f751ee..61d4935 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -1,10 +1,14 @@
#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
+#define DEBUG_TYPE "static-data-profile-info"
+
using namespace llvm;
namespace llvm {
@@ -60,6 +64,47 @@ void StaticDataProfileInfo::addConstantProfileCount(
OriginalCount = getInstrMaxCountValue();
}
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
+ const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const {
+ // The accummulated counter shows the constant is hot. Return enum 'hot'
+ // whether this variable is seen by unprofiled functions or not.
+ if (PSI->isHotCount(Count))
+ return StaticDataHotness::Hot;
+ // The constant is not hot, and seen by unprofiled functions. We don't want to
+ // assign it to unlikely sections, even if the counter says 'cold'. So return
+ // enum 'LukewarmOrUnknown'.
+ if (ConstantWithoutCounts.count(C))
+ return StaticDataHotness::LukewarmOrUnknown;
+ // The accummulated counter shows the constant is cold so return enum 'cold'.
+ if (PSI->isColdCount(Count))
+ return StaticDataHotness::Cold;
+
+ return StaticDataHotness::LukewarmOrUnknown;
+}
+
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getSectionHotnessUsingDataAccessProfile(
+ std::optional<StringRef> MaybeSectionPrefix) const {
+ if (!MaybeSectionPrefix)
+ return StaticDataHotness::LukewarmOrUnknown;
+ StringRef Prefix = *MaybeSectionPrefix;
+ assert((Prefix == "hot" || Prefix == "unlikely") &&
+ "Expect section_prefix to be one of hot or unlikely");
+ return Prefix == "hot" ? StaticDataHotness::Hot : StaticDataHotness::Cold;
+}
+
+StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
+ switch (Hotness) {
+ case StaticDataHotness::Cold:
+ return "unlikely";
+ case StaticDataHotness::Hot:
+ return "hot";
+ default:
+ return "";
+ }
+}
+
std::optional<uint64_t>
StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
auto I = ConstantProfileCounts.find(C);
@@ -70,27 +115,67 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
- auto Count = getConstantProfileCount(C);
+ std::optional<uint64_t> Count = getConstantProfileCount(C);
+
+#ifndef NDEBUG
+ auto DbgPrintPrefix = [](StringRef Prefix) {
+ return Prefix.empty() ? "<empty>" : Prefix;
+ };
+#endif
+
+ if (EnableDataAccessProf) {
+ // Module flag `HasDataAccessProf` is 1 -> empty section prefix means
+ // unknown hotness except for string literals.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+ GV && llvm::memprof::IsAnnotationOK(*GV) &&
+ !GV->getName().starts_with(".str")) {
+ auto HotnessFromDataAccessProf =
+ getSectionHotnessUsingDataAccessProfile(GV->getSectionPrefix());
+
+ if (!Count) {
+ StringRef Prefix = hotnessToStr(HotnessFromDataAccessProf);
+ LLVM_DEBUG(dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", solely from data access profiles\n");
+ return Prefix;
+ }
+
+ // Both data access profiles and PGO counters are available. Use the
+ // hotter one.
+ auto HotnessFromPGO = getConstantHotnessUsingProfileCount(C, PSI, *Count);
+ StaticDataHotness GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ if (HotnessFromDataAccessProf == StaticDataHotness::Hot ||
+ HotnessFromPGO == StaticDataHotness::Hot) {
+ GlobalVarHotness = StaticDataHotness::Hot;
+ } else if (HotnessFromDataAccessProf ==
+ StaticDataHotness::LukewarmOrUnknown ||
+ HotnessFromPGO == StaticDataHotness::LukewarmOrUnknown) {
+ GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+ } else {
+ GlobalVarHotness = StaticDataHotness::Cold;
+ }
+ StringRef Prefix = hotnessToStr(GlobalVarHotness);
+ LLVM_DEBUG(
+ dbgs() << GV->getName() << " has section prefix "
+ << DbgPrintPrefix(Prefix)
+ << ", the max from data access profiles as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromDataAccessProf))
+ << " and PGO counters as "
+ << DbgPrintPrefix(hotnessToStr(HotnessFromPGO)) << "\n");
+ return Prefix;
+ }
+ }
if (!Count)
return "";
- // The accummulated counter shows the constant is hot. Return 'hot' whether
- // this variable is seen by unprofiled functions or not.
- if (PSI->isHotCount(*Count))
- return "hot";
- // The constant is not hot, and seen by unprofiled functions. We don't want to
- // assign it to unlikely sections, even if the counter says 'cold'. So return
- // an empty prefix before checking whether the counter is cold.
- if (ConstantWithoutCounts.count(C))
- return "";
- // The accummulated counter shows the constant is cold. Return 'unlikely'.
- if (PSI->isColdCount(*Count))
- return "unlikely";
- // The counter says lukewarm. Return an empty prefix.
- return "";
+ return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
}
bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
- Info.reset(new StaticDataProfileInfo());
+ bool EnableDataAccessProf = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableDataAccessProf")))
+ EnableDataAccessProf = MD->getZExtValue();
+ Info.reset(new StaticDataProfileInfo(EnableDataAccessProf));
return false;
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 380b192..cf63285 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -329,10 +329,6 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
for (const auto &[Name, Info] : make_early_inc_range(ForwardRefVals)) {
if (StringRef(Name).starts_with("llvm.")) {
Intrinsic::ID IID = Intrinsic::lookupIntrinsicID(Name);
- if (IID == Intrinsic::not_intrinsic)
- // Don't do anything for unknown intrinsics.
- continue;
-
// Automatically create declarations for intrinsics. Intrinsics can only
// be called directly, so the call function type directly determines the
// declaration function type.
@@ -346,11 +342,26 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
return error(Info.second, "intrinsic can only be used as callee");
SmallVector<Type *> OverloadTys;
- if (!Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(),
- OverloadTys))
- return error(Info.second, "invalid intrinsic signature");
-
- U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys));
+ if (IID != Intrinsic::not_intrinsic &&
+ Intrinsic::getIntrinsicSignature(IID, CB->getFunctionType(),
+ OverloadTys)) {
+ U.set(Intrinsic::getOrInsertDeclaration(M, IID, OverloadTys));
+ } else {
+ // Try to upgrade the intrinsic.
+ Function *TmpF = Function::Create(CB->getFunctionType(),
+ Function::ExternalLinkage, Name, M);
+ Function *NewF = nullptr;
+ if (!UpgradeIntrinsicFunction(TmpF, NewF)) {
+ if (IID == Intrinsic::not_intrinsic)
+ return error(Info.second, "unknown intrinsic '" + Name + "'");
+ return error(Info.second, "invalid intrinsic signature");
+ }
+
+ U.set(TmpF);
+ UpgradeIntrinsicCall(CB, NewF);
+ if (TmpF->use_empty())
+ TmpF->eraseFromParent();
+ }
}
Info.first->eraseFromParent();
@@ -1259,7 +1270,7 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, unsigned NameID,
if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else if (!IsAlias && Lex.getKind() == lltok::MetadataVar) {
- if (parseGlobalObjectMetadataAttachment(*GI.get()))
+ if (parseGlobalObjectMetadataAttachment(*GI))
return true;
} else {
return tokError("unknown alias or ifunc property!");
@@ -5865,6 +5876,7 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
REQUIRED(file, MDField, (/* AllowNull */ false)); \
OPTIONAL(language, DwarfLangField, ); \
OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \
+ OPTIONAL(sourceLanguageVersion, MDUnsignedField, (0, UINT32_MAX)); \
OPTIONAL(producer, MDStringField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
OPTIONAL(flags, MDStringField, ); \
@@ -5894,10 +5906,15 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
return error(Loc, "can only specify one of 'language' and "
"'sourceLanguageName' on !DICompileUnit");
+ if (sourceLanguageVersion.Seen && !sourceLanguageName.Seen)
+ return error(Loc, "'sourceLanguageVersion' requires an associated "
+ "'sourceLanguageName' on !DICompileUnit");
+
Result = DICompileUnit::getDistinct(
Context,
language.Seen ? DISourceLanguageName(language.Val)
- : DISourceLanguageName(sourceLanguageName.Val, 0),
+ : DISourceLanguageName(sourceLanguageName.Val,
+ sourceLanguageVersion.Val),
file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp
index e0a4471..19d5b98 100644
--- a/llvm/lib/BinaryFormat/XCOFF.cpp
+++ b/llvm/lib/BinaryFormat/XCOFF.cpp
@@ -112,26 +112,26 @@ StringRef XCOFF::getNameForTracebackTableLanguageId(
XCOFF::CFileCpuId XCOFF::getCpuID(StringRef CPUName) {
StringRef CPU = PPC::normalizeCPUName(CPUName);
return StringSwitch<XCOFF::CFileCpuId>(CPU)
- .Cases("generic", "COM", XCOFF::TCPU_COM)
+ .Cases({"generic", "COM"}, XCOFF::TCPU_COM)
.Case("601", XCOFF::TCPU_601)
- .Cases("602", "603", "603e", "603ev", XCOFF::TCPU_603)
- .Cases("604", "604e", XCOFF::TCPU_604)
+ .Cases({"602", "603", "603e", "603ev"}, XCOFF::TCPU_603)
+ .Cases({"604", "604e"}, XCOFF::TCPU_604)
.Case("620", XCOFF::TCPU_620)
.Case("970", XCOFF::TCPU_970)
- .Cases("a2", "g3", "g4", "g5", "e500", XCOFF::TCPU_COM)
- .Cases("pwr3", "pwr4", XCOFF::TCPU_COM)
- .Cases("pwr5", "PWR5", XCOFF::TCPU_PWR5)
- .Cases("pwr5x", "PWR5X", XCOFF::TCPU_PWR5X)
- .Cases("pwr6", "PWR6", XCOFF::TCPU_PWR6)
- .Cases("pwr6x", "PWR6E", XCOFF::TCPU_PWR6E)
- .Cases("pwr7", "PWR7", XCOFF::TCPU_PWR7)
- .Cases("pwr8", "PWR8", XCOFF::TCPU_PWR8)
- .Cases("pwr9", "PWR9", XCOFF::TCPU_PWR9)
- .Cases("pwr10", "PWR10", XCOFF::TCPU_PWR10)
- .Cases("ppc", "PPC", "ppc32", "ppc64", XCOFF::TCPU_COM)
+ .Cases({"a2", "g3", "g4", "g5", "e500"}, XCOFF::TCPU_COM)
+ .Cases({"pwr3", "pwr4"}, XCOFF::TCPU_COM)
+ .Cases({"pwr5", "PWR5"}, XCOFF::TCPU_PWR5)
+ .Cases({"pwr5x", "PWR5X"}, XCOFF::TCPU_PWR5X)
+ .Cases({"pwr6", "PWR6"}, XCOFF::TCPU_PWR6)
+ .Cases({"pwr6x", "PWR6E"}, XCOFF::TCPU_PWR6E)
+ .Cases({"pwr7", "PWR7"}, XCOFF::TCPU_PWR7)
+ .Cases({"pwr8", "PWR8"}, XCOFF::TCPU_PWR8)
+ .Cases({"pwr9", "PWR9"}, XCOFF::TCPU_PWR9)
+ .Cases({"pwr10", "PWR10"}, XCOFF::TCPU_PWR10)
+ .Cases({"ppc", "PPC", "ppc32", "ppc64"}, XCOFF::TCPU_COM)
.Case("ppc64le", XCOFF::TCPU_PWR8)
.Case("future", XCOFF::TCPU_PWR10)
- .Cases("any", "ANY", XCOFF::TCPU_ANY)
+ .Cases({"any", "ANY"}, XCOFF::TCPU_ANY)
.Default(XCOFF::TCPU_INVALID);
}
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index cdcf7a8..ed0443f 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1860,7 +1860,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_COMPILE_UNIT: {
- if (Record.size() < 14 || Record.size() > 22)
+ if (Record.size() < 14 || Record.size() > 23)
return error("Invalid record");
// Ignore Record[0], which indicates whether this compile unit is
@@ -1869,11 +1869,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
const auto LangVersionMask = (uint64_t(1) << 63);
const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+ const uint32_t LanguageVersion = Record.size() > 22 ? Record[22] : 0;
auto *CU = DICompileUnit::getDistinct(
Context,
HasVersionedLanguage
- ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+ ? DISourceLanguageName(Record[1] & ~LangVersionMask,
+ LanguageVersion)
: DISourceLanguageName(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 54e916e..8ff3aa9 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2142,6 +2142,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
Record.push_back(N->getRangesBaseAddress());
Record.push_back(VE.getMetadataOrNullID(N->getRawSysRoot()));
Record.push_back(VE.getMetadataOrNullID(N->getRawSDK()));
+ Record.push_back(Lang.hasVersionedName() ? Lang.getVersion() : 0);
Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
Record.clear();
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
index 323b21e..4e6f93e 100644
--- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -1102,8 +1102,6 @@ void TrieRawHashMapHandle::print(
if (auto Err = Printer.printRecords())
OS << "error: " << toString(std::move(Err)) << "\n";
-
- return;
}
Error TrieRawHashMapHandle::validate(
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 05fffe9..e2af0c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VCSRevision.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -476,6 +477,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool AsmPrinter::doInitialization(Module &M) {
+ VFS = vfs::getRealFileSystem();
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
HasSplitStack = false;
@@ -1683,7 +1685,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
return nullptr;
}
-/// Emits .callgraph section.
+/// Emits .llvm.callgraph section.
void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
FunctionCallGraphInfo &FuncCGInfo) {
if (!MF.getTarget().Options.EmitCallGraphSection)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c364ffc..8dd8b9da 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -98,6 +99,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ SrcMgr.setVirtualFileSystem(VFS);
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 2d50167..fae952e 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -491,6 +491,20 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
return true;
}
if (FBB) {
+ // If we get here with a MBB which ends like this:
+ //
+ // bb.1:
+ // successors: %bb.2;
+ // ...
+ // BNE $x1, $x0, %bb.2
+ // PseudoBR %bb.2
+ //
+ // Just remove conditional branch.
+ if (TBB == FBB) {
+ removeBranch(MBB);
+ insertUncondBranch(MBB, TBB);
+ return true;
+ }
// We need to split the basic block here to obtain two long-range
// unconditional branches.
NewBB = createNewBlockAfter(*MBB);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4320b1d..9e78ec9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -819,7 +819,7 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
}
// Verify BFI has been updated correctly by recomputing BFI and comparing them.
-void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
DominatorTree NewDT(F);
LoopInfo NewLI(NewDT);
BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cffaf7c..38ec83f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3292,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
@@ -3325,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx == 2) {
Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index e359831..ea08365 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1257,7 +1257,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.clear();
}
-static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+[[maybe_unused]] static void printSpillReloadChain(
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
MachineInstr *Leader) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 787a81a..c97300d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
bool InexpensiveOnly = false,
std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
- SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildRsqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -17759,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -17825,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
bool AllowNewConst = (Level < AfterLegalizeDAG);
// If nnan is enabled, fold lots of things.
- if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ if (Flags.hasNoNaNs() && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -17974,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -18002,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+ if (Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
@@ -18313,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
MatchContextClass matcher(DAG, TLI, N);
@@ -18339,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
- if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) ||
- (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
+ if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
if (N->getFlags().hasNoSignedZeros() ||
(N2CFP && !N2CFP->isExactlyValue(-0.0))) {
if (N0CFP && N0CFP->isZero())
@@ -18590,20 +18586,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18635,7 +18629,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
SDValue AAZ =
DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
// Estimate creation failed. Clean up speculatively created nodes.
@@ -18645,7 +18639,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
- if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18742,11 +18736,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
- return buildSqrtEstimate(N0, Flags);
+ return buildSqrtEstimate(N0);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29743,28 +29738,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
}
// If non-reciprocal square root is requested, multiply the result by Arg.
if (!Reciprocal)
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
return Est;
}
@@ -29775,8 +29769,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29789,9 +29782,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations for reciprocal square root:
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29799,13 +29792,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
SDValue LHS;
if (Reciprocal || (i + 1) < Iterations) {
// RSQRT: LHS = (E * -0.5)
- LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
} else {
// SQRT: LHS = (A * E) * -0.5
- LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
}
- Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
}
return Est;
@@ -29814,8 +29807,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
- bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
if (LegalDAG)
return SDValue();
@@ -29843,8 +29835,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (Iterations > 0)
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
@@ -29862,12 +29854,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
return SDValue();
}
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, true);
}
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, false);
}
/// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 6610eef..c61f757 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -181,8 +181,8 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
DWARFDebugFrame::~DWARFDebugFrame() = default;
-static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
- uint64_t Offset, int Length) {
+[[maybe_unused]] static void dumpDataAux(DataExtractor Data, uint64_t Offset,
+ int Length) {
errs() << "DUMP: ";
for (int i = 0; i < Length; ++i) {
uint8_t c = Data.getU8(&Offset);
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 38e9ac5..fa39603 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
if (FilePath.empty()) {
// If we had a DW_AT_decl_file, but got no file then we need to emit a
// warning.
+ const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+ Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+ // Check if there is no DW_AT_decl_line attribute, and don't report an
+ // error if it isn't there.
+ if (DwarfFileIdx == UINT32_MAX)
+ return;
Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
- const uint64_t DwarfFileIdx = dwarf::toUnsigned(
- Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
OS << "error: function DIE at " << HEX32(Die.getOffset())
<< " has an invalid file index " << DwarfFileIdx
<< " in its DW_AT_decl_file attribute, unable to create a single "
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 5b3c05e..6c7e27e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -260,22 +260,17 @@ public:
}
// Run finalization actions.
- using WrapperFunctionCall = orc::shared::WrapperFunctionCall;
- runFinalizeActions(
- G->allocActions(),
- [this, OnFinalized = std::move(OnFinalized)](
- Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable {
- completeFinalization(std::move(OnFinalized),
- std::move(DeallocActions));
- });
- }
+ auto DeallocActions = runFinalizeActions(G->allocActions());
+ if (!DeallocActions) {
+ OnFinalized(DeallocActions.takeError());
+ return;
+ }
- void abandon(OnAbandonedFunction OnAbandoned) override {
- Error Err = Error::success();
- if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
- if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
- Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ // Release the finalize segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+ OnFinalized(errorCodeToError(EC));
+ return;
+ }
#ifndef NDEBUG
// Set 'G' to null to flag that we've been successfully finalized.
@@ -284,22 +279,17 @@ public:
G = nullptr;
#endif
- OnAbandoned(std::move(Err));
+ // Continue with finalized allocation.
+ OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+ std::move(*DeallocActions)));
}
-private:
- void completeFinalization(
- OnFinalizedFunction OnFinalized,
- Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) {
-
- if (!DeallocActions)
- return OnFinalized(DeallocActions.takeError());
-
- // Release the finalize segments slab.
- if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
- OnFinalized(errorCodeToError(EC));
- return;
- }
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ Error Err = Error::success();
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
#ifndef NDEBUG
// Set 'G' to null to flag that we've been successfully finalized.
@@ -308,11 +298,10 @@ private:
G = nullptr;
#endif
- // Continue with finalized allocation.
- OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
- std::move(*DeallocActions)));
+ OnAbandoned(std::move(Err));
}
+private:
Error applyProtections() {
for (auto &KV : BL.segments()) {
const auto &AG = KV.first;
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7b327af..7e606c6a 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size);
}
- std::vector<shared::WrapperFunctionCall> DeinitializeActions;
- {
- std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
- auto F = P.get_future();
- shared::runFinalizeActions(
- AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
- P.set_value(std::move(R));
- });
- if (auto DeinitializeActionsOrErr = F.get())
- DeinitializeActions = std::move(*DeinitializeActionsOrErr);
- else
- return OnInitialized(DeinitializeActionsOrErr.takeError());
- }
+ auto DeinitializeActions = shared::runFinalizeActions(AI.Actions);
+ if (!DeinitializeActions)
+ return OnInitialized(DeinitializeActions.takeError());
{
std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
// This is the maximum range whose permission have been possibly modified
auto &Alloc = Allocations[MinAddr];
Alloc.Size = MaxAddr - MinAddr;
- Alloc.DeinitializationActions = std::move(DeinitializeActions);
+ Alloc.DeinitializationActions = std::move(*DeinitializeActions);
Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr);
}
@@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize(
for (auto Base : llvm::reverse(Bases)) {
- shared::runDeallocActions(
- Allocations[Base].DeinitializationActions, [&](Error Err) {
- AllErr = joinErrors(std::move(AllErr), std::move(Err));
- });
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
// Reset protections to read/write so the area can be reused
if (auto EC = sys::Memory::protectMappedMemory(
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
index 08ab0c6..91f2899 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
@@ -12,39 +12,31 @@ namespace llvm {
namespace orc {
namespace shared {
-void runFinalizeActions(AllocActions &AAs,
- OnRunFinalizeActionsCompleteFn OnComplete) {
+Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs) {
std::vector<WrapperFunctionCall> DeallocActions;
DeallocActions.reserve(numDeallocActions(AAs));
for (auto &AA : AAs) {
if (AA.Finalize)
-
- if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) {
- while (!DeallocActions.empty()) {
- Err = joinErrors(std::move(Err),
- DeallocActions.back().runWithSPSRetErrorMerged());
- DeallocActions.pop_back();
- }
- return OnComplete(std::move(Err));
- }
+ if (auto Err = AA.Finalize.runWithSPSRetErrorMerged())
+ return joinErrors(std::move(Err), runDeallocActions(DeallocActions));
if (AA.Dealloc)
DeallocActions.push_back(std::move(AA.Dealloc));
}
AAs.clear();
- OnComplete(std::move(DeallocActions));
+ return DeallocActions;
}
-void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
- OnRunDeallocActionsComeleteFn OnComplete) {
+Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) {
Error Err = Error::success();
while (!DAs.empty()) {
Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged());
DAs = DAs.drop_back();
}
- OnComplete(std::move(Err));
+ return Err;
}
} // namespace shared
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 8c24b1f..4fbf232 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -9,10 +9,8 @@
#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/WindowsError.h"
-#include <future>
#include <sstream>
#if defined(LLVM_ON_UNIX)
@@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
}
// Run finalization actions and get deinitlization action list.
- std::vector<shared::WrapperFunctionCall> DeinitializeActions;
- {
- std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
- auto F = P.get_future();
- shared::runFinalizeActions(
- FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
- P.set_value(std::move(R));
- });
- if (auto DeinitializeActionsOrErr = F.get())
- DeinitializeActions = std::move(*DeinitializeActionsOrErr);
- else
- return DeinitializeActionsOrErr.takeError();
+ auto DeinitializeActions = shared::runFinalizeActions(FR.Actions);
+ if (!DeinitializeActions) {
+ return DeinitializeActions.takeError();
}
{
std::lock_guard<std::mutex> Lock(Mutex);
Allocations[MinAddr].DeinitializationActions =
- std::move(DeinitializeActions);
+ std::move(*DeinitializeActions);
Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr);
}
@@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
std::lock_guard<std::mutex> Lock(Mutex);
for (auto Base : llvm::reverse(Bases)) {
- shared::runDeallocActions(
- Allocations[Base].DeinitializationActions, [&](Error Err) {
- if (Err)
- AllErr = joinErrors(std::move(AllErr), std::move(Err));
- });
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
// Remove the allocation from the allocation list of its reservation
for (auto &Reservation : Reservations) {
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 2430d98..3908a78 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2374,16 +2374,21 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Out << "!DICompileUnit(";
MDFieldPrinter Printer(Out, WriterCtx);
- auto Lang = N->getSourceLanguage();
- if (Lang.hasVersionedName())
+ DISourceLanguageName Lang = N->getSourceLanguage();
+
+ if (Lang.hasVersionedName()) {
Printer.printDwarfEnum(
"sourceLanguageName",
static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
dwarf::SourceLanguageNameString,
/* ShouldSkipZero */ false);
- else
+
+ Printer.printInt("sourceLanguageVersion", Lang.getVersion(),
+ /*ShouldSkipZero=*/true);
+ } else {
Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
/* ShouldSkipZero */ false);
+ }
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printString("producer", N->getProducer());
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index a755c22..aee3c3b 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -553,7 +553,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
SFrameSection =
Ctx->getELFSection(".sframe", ELF::SHT_GNU_SFRAME, ELF::SHF_ALLOC);
- CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
+ CallGraphSection =
+ Ctx->getELFSection(".llvm.callgraph", ELF::SHT_PROGBITS, 0);
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
@@ -1171,8 +1172,8 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
}
return Ctx->getELFSection(
- ".callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
- ElfSec.getUniqueID(),
+ ".llvm.callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+ /*IsComdat=*/true, ElfSec.getUniqueID(),
static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
}
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 3c8e44a..0208735 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -302,7 +302,7 @@ void ProfOStream::patch(ArrayRef<PatchItem> P) {
std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage,
StringRef FileName,
- uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] uint64_t Version) {
// Value names may be prefixed with a binary '1' to indicate
// that the backend should not modify the symbols due to any platform
// naming convention. Do not include that '1' in the PGO profile name.
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index 82b0e6a..eff9947 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -141,7 +141,7 @@ extern "C" const char *__crashreporter_info__
asm(".desc ___crashreporter_info__, 0x10");
#endif
-static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static void setCrashLogMessage(const char *msg);
static void setCrashLogMessage(const char *msg) {
#ifdef HAVE_CRASHREPORTERCLIENT_H
(void)CRSetCrashLogMessage(msg);
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index a43cf37a..f2bbaab 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -38,6 +39,22 @@ using namespace llvm;
static const size_t TabStop = 8;
+// Out of line to avoid needing definition of vfs::FileSystem in header.
+SourceMgr::SourceMgr() = default;
+SourceMgr::SourceMgr(IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : FS(std::move(FS)) {}
+SourceMgr::SourceMgr(SourceMgr &&) = default;
+SourceMgr &SourceMgr::operator=(SourceMgr &&) = default;
+SourceMgr::~SourceMgr() = default;
+
+IntrusiveRefCntPtr<vfs::FileSystem> SourceMgr::getVirtualFileSystem() const {
+ return FS;
+}
+
+void SourceMgr::setVirtualFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ this->FS = std::move(FS);
+}
+
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc,
std::string &IncludedFile) {
@@ -52,8 +69,11 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
ErrorOr<std::unique_ptr<MemoryBuffer>>
SourceMgr::OpenIncludeFile(const std::string &Filename,
std::string &IncludedFile) {
+ if (!FS)
+ reportFatalInternalError("Opening include file from SourceMgr without VFS");
+
ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
- MemoryBuffer::getFile(Filename);
+ FS->getBufferForFile(Filename);
SmallString<64> Buffer(Filename);
// If the file didn't exist directly, see if it's in an include path.
@@ -61,7 +81,7 @@ SourceMgr::OpenIncludeFile(const std::string &Filename,
++i) {
Buffer = IncludeDirectories[i];
sys::path::append(Buffer, Filename);
- NewBufOrErr = MemoryBuffer::getFile(Buffer);
+ NewBufOrErr = FS->getBufferForFile(Buffer);
}
if (NewBufOrErr)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 42043f7..b1024a8 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -128,6 +129,7 @@ int llvm::TableGenMain(const char *argv0,
// Record the location of the include directory so that the lexer can find
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
+ SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp
index 2c3726a..db45054 100644
--- a/llvm/lib/TableGen/Parser.cpp
+++ b/llvm/lib/TableGen/Parser.cpp
@@ -9,6 +9,7 @@
#include "llvm/TableGen/Parser.h"
#include "TGParser.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TableGen/Record.h"
using namespace llvm;
@@ -21,6 +22,7 @@ bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) {
SrcMgr = SourceMgr();
SrcMgr.takeSourceBuffersFrom(InputSrcMgr);
SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs());
+ SrcMgr.setVirtualFileSystem(InputSrcMgr.getVirtualFileSystem());
SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(),
InputSrcMgr.getDiagContext());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9926a4d..be2f2e4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16254,7 +16254,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
SplatVal > 1) {
SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
SDValue Res =
- DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
+ DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32));
if (Negated)
Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
@@ -22942,7 +22942,7 @@ static SDValue performIntrinsicCombine(SDNode *N,
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_asrd:
- return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
@@ -30047,7 +30047,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
SDValue Res =
- DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
+ DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
if (Negated)
Res = DAG.getNode(ISD::SUB, DL, ContainerVT,
DAG.getConstant(0, DL, ContainerVT), Res);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212..fe84193 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_USDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
+def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index b3c9656..343fd81 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -40,7 +40,11 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo(
getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
- : std::nullopt) {}
+ : std::nullopt),
+ HasStreamingModeChanges(
+ MFI.hasStreamingModeChanges()
+ ? std::optional<bool>(MFI.hasStreamingModeChanges())
+ : std::nullopt) {}
void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this);
@@ -55,6 +59,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
+ if (YamlMFI.HasStreamingModeChanges)
+ setHasStreamingModeChanges(*YamlMFI.HasStreamingModeChanges);
}
static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index bd0a17d..d1832f4 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -645,6 +645,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<uint64_t> StackSizeZPR;
std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
+ std::optional<bool> HasStreamingModeChanges;
AArch64FunctionInfo() = default;
AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
@@ -659,6 +660,7 @@ template <> struct MappingTraits<AArch64FunctionInfo> {
YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
+ YamlIO.mapOptional("hasStreamingModeChanges", MFI.HasStreamingModeChanges);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
index cdf2822..a90950d 100644
--- a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
@@ -75,6 +75,10 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
if (Src != Dst)
MRI->replaceRegWith(Dst, Src);
+ if (MI.getOperand(1).isUndef())
+ for (MachineOperand &MO : MRI->use_operands(Dst))
+ MO.setIsUndef();
+
// MI must be erased from the basic block before recalculating the live
// interval.
LIS->RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index bc6b931..98a128e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -265,7 +265,7 @@ def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
]>;
-def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
+def AArch64asrd_m1 : SDNode<"AArch64ISD::ASRD_MERGE_OP1", SDT_AArch64Arith_Imm>;
def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>;
def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9e2d698..05a4313 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerTriOp(AArch64::G_UDOT);
case Intrinsic::aarch64_neon_sdot:
return LowerTriOp(AArch64::G_SDOT);
+ case Intrinsic::aarch64_neon_usdot:
+ return LowerTriOp(AArch64::G_USDOT);
case Intrinsic::aarch64_neon_sqxtn:
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 4749748..434ea67 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI,
LiveRegs PhysLiveRegs);
+ /// Attempts to find an insertion point before \p Inst where the status flags
+ /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
+ /// the block is found.
+ std::pair<MachineBasicBlock::iterator, LiveRegs>
+ findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
+ SmallVectorImpl<InstInfo>::const_iterator Inst);
void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, ZAState From,
ZAState To, LiveRegs PhysLiveRegs);
@@ -337,6 +343,28 @@ private:
MachineRegisterInfo *MRI = nullptr;
};
+static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
+ LiveRegs PhysLiveRegs = LiveRegs::None;
+ if (!LiveUnits.available(AArch64::NZCV))
+ PhysLiveRegs |= LiveRegs::NZCV;
+ // We have to track W0 and X0 separately as otherwise things can get
+ // confused if we attempt to preserve X0 but only W0 was defined.
+ if (!LiveUnits.available(AArch64::W0))
+ PhysLiveRegs |= LiveRegs::W0;
+ if (!LiveUnits.available(AArch64::W0_HI))
+ PhysLiveRegs |= LiveRegs::W0_HI;
+ return PhysLiveRegs;
+}
+
+static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
+ if (PhysLiveRegs & LiveRegs::NZCV)
+ LiveUnits.addReg(AArch64::NZCV);
+ if (PhysLiveRegs & LiveRegs::W0)
+ LiveUnits.addReg(AArch64::W0);
+ if (PhysLiveRegs & LiveRegs::W0_HI)
+ LiveUnits.addReg(AArch64::W0_HI);
+}
+
FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
SMEFnAttrs.hasZAState()) &&
@@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
LiveRegUnits LiveUnits(*TRI);
LiveUnits.addLiveOuts(MBB);
- auto GetPhysLiveRegs = [&] {
- LiveRegs PhysLiveRegs = LiveRegs::None;
- if (!LiveUnits.available(AArch64::NZCV))
- PhysLiveRegs |= LiveRegs::NZCV;
- // We have to track W0 and X0 separately as otherwise things can get
- // confused if we attempt to preserve X0 but only W0 was defined.
- if (!LiveUnits.available(AArch64::W0))
- PhysLiveRegs |= LiveRegs::W0;
- if (!LiveUnits.available(AArch64::W0_HI))
- PhysLiveRegs |= LiveRegs::W0_HI;
- return PhysLiveRegs;
- };
-
- Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
+ Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
for (MachineInstr &MI : reverse(MBB)) {
MachineBasicBlock::iterator MBBI(MI);
LiveUnits.stepBackward(MI);
- LiveRegs PhysLiveRegs = GetPhysLiveRegs();
+ LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
// The SMEStateAllocPseudo marker is added to a function if the save
// buffer was allocated in SelectionDAG. It marks the end of the
// allocation -- which is a safe point for this pass to insert any TPIDR2
@@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
return BundleStates;
}
+std::pair<MachineBasicBlock::iterator, LiveRegs>
+MachineSMEABI::findStateChangeInsertionPoint(
+ MachineBasicBlock &MBB, const BlockInfo &Block,
+ SmallVectorImpl<InstInfo>::const_iterator Inst) {
+ LiveRegs PhysLiveRegs;
+ MachineBasicBlock::iterator InsertPt;
+ if (Inst != Block.Insts.end()) {
+ InsertPt = Inst->InsertPt;
+ PhysLiveRegs = Inst->PhysLiveRegs;
+ } else {
+ InsertPt = MBB.getFirstTerminator();
+ PhysLiveRegs = Block.PhysLiveRegsAtExit;
+ }
+
+ if (!(PhysLiveRegs & LiveRegs::NZCV))
+ return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
+
+ // Find the previous state change. We can not move before this point.
+ MachineBasicBlock::iterator PrevStateChangeI;
+ if (Inst == Block.Insts.begin()) {
+ PrevStateChangeI = MBB.begin();
+ } else {
+ // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
+ // InstInfo object for instructions that require a specific ZA state, so the
+ // InstInfo is the site of the previous state change in the block (which can
+ // be several MIs earlier).
+ PrevStateChangeI = std::prev(Inst)->InsertPt;
+ }
+
+ // Note: LiveUnits will only accurately track X0 and NZCV.
+ LiveRegUnits LiveUnits(*TRI);
+ setPhysLiveRegs(LiveUnits, PhysLiveRegs);
+ for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
+ // Don't move before/into a call (which may have a state change before it).
+ if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
+ break;
+ LiveUnits.stepBackward(*I);
+ if (LiveUnits.available(AArch64::NZCV))
+ return {I, getPhysLiveRegs(LiveUnits)};
+ }
+ return {InsertPt, PhysLiveRegs};
+}
+
void MachineSMEABI::insertStateChanges(EmitContext &Context,
const FunctionInfo &FnInfo,
const EdgeBundles &Bundles,
@@ -490,10 +548,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
CurrentState = InState;
for (auto &Inst : Block.Insts) {
- if (CurrentState != Inst.NeededState)
- emitStateChange(Context, MBB, Inst.InsertPt, CurrentState,
- Inst.NeededState, Inst.PhysLiveRegs);
- CurrentState = Inst.NeededState;
+ if (CurrentState != Inst.NeededState) {
+ auto [InsertPt, PhysLiveRegs] =
+ findStateChangeInsertionPoint(MBB, Block, &Inst);
+ emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
+ PhysLiveRegs);
+ CurrentState = Inst.NeededState;
+ }
}
if (MBB.succ_empty())
@@ -501,9 +562,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
ZAState OutState =
BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
- if (CurrentState != OutState)
- emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState,
- OutState, Block.PhysLiveRegsAtExit);
+ if (CurrentState != OutState) {
+ auto [InsertPt, PhysLiveRegs] =
+ findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
+ emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
+ PhysLiveRegs);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index c684f9e..01a40c1 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TotalCyclesBetweenCandidates < NumMFMACycles - 1)
InstrsToUnpack.insert(&Instr);
}
- return;
}
void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
@@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
HiDstOp.setIsRenamable(DstOp.isRenamable());
I.eraseFromParent();
- return;
}
MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9945ecc..0d7b6d1 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -161,8 +161,8 @@ namespace {
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
return TE.PseudoOpc < PseudoOpc;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
- const NEONLdStTableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
return PseudoOpc < TE.PseudoOpc;
}
};
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 82c43ff..26a8728 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
/// Returns the bit offset to backpatch with the location of the real VST.
void DXILBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
- if (!M.getTargetTriple().empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE,
- M.getTargetTriple().str(), 0 /*TODO*/);
- const std::string &DL = M.getDataLayoutStr();
- if (!DL.empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
+ // We need to hardcode a triple and datalayout that's compatible with the
+ // historical DXIL triple and datalayout from DXC.
+ StringRef Triple = "dxil-ms-dx";
+ StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
+ "f16:32-f32:32-f64:64-n8:16:32:64";
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
if (!M.getModuleInlineAsm().empty())
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
index 1eb03bf..725f2b1 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -149,11 +149,6 @@ public:
std::string Data;
llvm::raw_string_ostream OS(Data);
- Triple OriginalTriple = M.getTargetTriple();
- // Set to DXIL triple when write to bitcode.
- // Only the output bitcode need to be DXIL triple.
- M.setTargetTriple(Triple("dxil-ms-dx"));
-
// Perform late legalization of lifetime intrinsics that would otherwise
// fail the Module Verifier if performed in an earlier pass
legalizeLifetimeIntrinsics(M);
@@ -165,9 +160,6 @@ public:
// not-so-legal legalizations
removeLifetimeIntrinsics(M);
- // Recover triple.
- M.setTargetTriple(OriginalTriple);
-
Constant *ModuleConstant =
ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 52e6b0b..68f5312 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -174,8 +174,8 @@ namespace {
const TargetRegisterInfo *TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
- LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegSet &P);
raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
OS << '{';
for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 14b6bb3..9087f9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -272,15 +272,14 @@ namespace {
OS << *I << ' ' << **I << '\n';
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S);
raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
dump_node_container(OS, S);
return OS;
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const NodeToUsesMap &M);
raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
for (const auto &I : M) {
const UseSet &Us = I.second;
@@ -914,9 +913,8 @@ namespace {
const NodeToValueMap &Map;
};
- raw_ostream &operator<< (raw_ostream &OS,
- const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
- raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const LocationAsBlock &Loc) {
for (const auto &I : Loc.Map) {
OS << I.first << " -> ";
if (BasicBlock *B = cast_or_null<BasicBlock>(I.second))
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 14a7ae7..3900aac 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -132,8 +132,7 @@ namespace {
const TargetRegisterInfo &TRI;
friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
};
- raw_ostream &operator<<(raw_ostream &OS,
- const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P);
raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
<< ", PredR:" << printReg(P.FP.PredR, &P.TRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index f9fdab4..9c81e96 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,11 +51,11 @@ private:
const TargetRegisterInfo &TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
- LLVM_ATTRIBUTE_UNUSED;
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
- return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
- }
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegister &PR);
+raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+ return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
+}
class HexagonGenPredicate : public MachineFunctionPass {
public:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bfea50e..6b48a21 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) {
return new HexagonTargetStreamer(S);
}
-static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) {
if (STI->hasFeature(F))
STI->ToggleFeature(F);
}
-static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) {
return STI->hasFeature(F);
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 944a1e2..8bf0d11 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
return SDV;
}
+ // Recognize build vector patterns to emit VSX vector instructions
+ // instead of loading value from memory.
+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
+ return VecPat;
}
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
@@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N,
return SDValue();
}
+// LXVKQ instruction load VSX vector with a special quadword value
+// based on an immediate value. This helper method returns the details of the
+// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
+// to help generate the LXVKQ instruction and the subsequent shift instruction
+// required to match the original build vector pattern.
+
+// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
+using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
+
+static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
+
+ // LXVKQ instruction loads the Quadword value:
+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
+ static const uint32_t Uim = 16;
+
+ // Check for direct LXVKQ match (no shift needed)
+ if (FullVal == BasePattern)
+ return std::make_tuple(Uim, uint8_t{0});
+
+ // Check if FullValue is 1 (the result of the base pattern >> 127)
+ if (FullVal == APInt(128, 1))
+ return std::make_tuple(Uim, uint8_t{127});
+
+ return std::nullopt;
+}
+
+/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
+/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
+/// LXVKQ instruction load VSX vector with a special quadword value based on an
+/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
+/// This can be used to inline the build vector constants that have the
+/// following patterns:
+///
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
+/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
+/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
+/// combination of splatting and right shift instructions.
+
+SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
+ "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
+
+ // This transformation is only supported if we are loading either a byte,
+ // halfword, word, or doubleword.
+ EVT VT = Op.getValueType();
+ if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64))
+ return SDValue();
+
+ LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
+ << VT.getEVTString() << "): ";
+ Op->dump());
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ElemBits = VT.getScalarSizeInBits();
+
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
+
+ // Check for Non-constant operand in the build vector.
+ for (const SDValue &Operand : Op.getNode()->op_values()) {
+ if (!isa<ConstantSDNode>(Operand))
+ return SDValue();
+ }
+
+ // Assemble build vector operands as a 128-bit register value
+ // We need to reconstruct what the 128-bit register pattern would be
+ // that produces this vector when interpreted with the current endianness
+ APInt FullVal = APInt::getZero(128);
+
+ for (unsigned Index = 0; Index < NumElems; ++Index) {
+ auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
+
+ // Get element value as raw bits (zero-extended)
+ uint64_t ElemValue = C->getZExtValue();
+
+ // Mask to element size to ensure we only get the relevant bits
+ if (ElemBits < 64)
+ ElemValue &= ((1ULL << ElemBits) - 1);
+
+ // Calculate bit position for this element in the 128-bit register
+ unsigned BitPos =
+ (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
+
+ // Create APInt for the element value and shift it to correct position
+ APInt ElemAPInt(128, ElemValue);
+ ElemAPInt <<= BitPos;
+
+ // Place the element value at the correct bit position
+ FullVal |= ElemAPInt;
+ }
+
+ if (FullVal.isZero() || FullVal.isAllOnes())
+ return SDValue();
+
+ if (auto UIMOpt = getPatternInfo(FullVal)) {
+ const auto &[Uim, ShiftAmount] = *UIMOpt;
+ SDLoc Dl(Op);
+
+ // Generate LXVKQ instruction if the shift amount is zero.
+ if (ShiftAmount == 0) {
+ SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
+ SDValue LxvkqInstr =
+ SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "combineBVLoadsSpecialValue: Instruction Emitted ";
+ LxvkqInstr.dump());
+ return LxvkqInstr;
+ }
+
+ assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
+
+ // The right shifted pattern can be constructed using a combination of
+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
+ // value 255.
+ SDValue ShiftAmountVec =
+ SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
+ DAG.getTargetConstant(255, Dl, MVT::i32)),
+ 0);
+ // Generate appropriate right shift instruction
+ SDValue ShiftVec = SDValue(
+ DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
+ 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
+ ShiftVec.dump());
+ return ShiftVec;
+ }
+ // No patterns matched for build vectors.
+ return SDValue();
+}
+
/// Reduce the number of loads when building a vector.
///
/// Building a vector out of multiple loads can be converted to a load
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 59f3387..880aca7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1472,6 +1472,9 @@ namespace llvm {
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
+ SDValue combineBVLoadsSpecialValue(SDValue Operand,
+ SelectionDAG &DAG) const;
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 2384959..2d8c633 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> {
126>;
}
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C),
+// NOT(B), NAND(B,C)}
+// - C is the "false" case op NOR(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNor<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 129>;
+
+ // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>;
+
+ // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)),
+ 133>;
+
+ // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 134>;
+
+ // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 138>;
+
+ // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)),
+ 140>;
+
+ // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 142>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B),
+// NOT(C)}
+// - C is the "false" case op EQV(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectEqv<ValueType Vt>{
+ // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 151>;
+
+ // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 152>;
+
+ // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 154>;
+
+ // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 158>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// B, NOT(B)}
+// - C is the "false" case op NOT(C)
+// =============================================================================
+multiclass XXEvalTernarySelectNotC<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>;
+
+ // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>;
+
+ // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>;
+
+ // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// C, NOT(B)}
+// - C is the "false" case op NOT(B)
+// =============================================================================
+multiclass XXEvalTernarySelectNotB<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>;
+
+ // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>;
+
+ // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>;
+
+ // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)}
+// - C is the "false" case op NAND(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNand<ValueType Vt>{
+ // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>;
+
+ // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>;
+
+ // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 230>;
+
+ // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 231>;
+
+ // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 233>;
+}
+
let Predicates = [PrefixInstrs, HasP10Vector] in {
let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
@@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
defm : XXEvalTernarySelectC<Ty>;
defm : XXEvalTernarySelectXor<Ty>;
defm : XXEvalTernarySelectOr<Ty>;
+ defm : XXEvalTernarySelectNor<Ty>;
+ defm : XXEvalTernarySelectEqv<Ty>;
+ defm : XXEvalTernarySelectNotC<Ty>;
+ defm : XXEvalTernarySelectNotB<Ty>;
+ defm : XXEvalTernarySelectNand<Ty>;
}
// Anonymous patterns to select prefixed VSX loads and stores.
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 8851a0f..e857b2d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) {
return StringSwitch<bool>(Format)
- .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true)
- .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj",
+ .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true)
+ .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"},
STI.hasFeature(RISCV::FeatureStdExtZca))
- .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es",
+ .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"},
!STI.hasFeature(RISCV::Feature64Bit))
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 50730c6..ab93bba 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL";
bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) {
// Return true if not one of the valid LMUL strings
return StringSwitch<bool>(Data)
- .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true)
+ .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f863392a..637d61fe 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
// and floating point computation.
// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can
// be one or two VA (Vector Arithmetic).
-multiclass SiFive7ProcResources<bit extraVALU = false> {
+multiclass SiFive7ProcResources<bit dualVALU = false> {
let BufferSize = 0 in {
def PipeA : ProcResource<1>;
def PipeB : ProcResource<1>;
@@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def FDiv : ProcResource<1>; // FP Division/Sqrt
// Arithmetic sequencer(s)
- if extraVALU then {
+ if dualVALU then {
// VA1 can handle any vector airthmetic instruction.
def VA1 : ProcResource<1>;
// VA2 generally can only handle simple vector arithmetic.
@@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"),
!cast<ProcResource>(NAME#"PipeB")]>;
- if extraVALU then
+ if dualVALU then
def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"),
!cast<ProcResource>(NAME#"VA2")]>;
}
@@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance {
/// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and
/// (2) WriteRes entries. It's parameterized by config values that will
/// eventually be supplied by different SchedMachineModels.
-multiclass SiFive7SchedResources<int vlen, bit extraVALU,
+multiclass SiFive7SchedResources<int vlen, bit dualVALU,
SiFive7FPLatencies fpLatencies,
bit hasFastGather> {
- defm SiFive7 : SiFive7ProcResources<extraVALU>;
+ defm SiFive7 : SiFive7ProcResources<dualVALU>;
// Pull out defs from SiFive7ProcResources so we can refer to them by name.
defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA);
@@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv);
defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv);
// Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU.
- defvar SiFive7VA1 = !if (extraVALU,
+ defvar SiFive7VA1 = !if (dualVALU,
!cast<ProcResource>(NAME # SiFive7VA1),
!cast<ProcResource>(NAME # SiFive7VA));
- defvar SiFive7VA1OrVA2 = !if (extraVALU,
+ defvar SiFive7VA1OrVA2 = !if (dualVALU,
!cast<ProcResGroup>(NAME # SiFive7VA1OrVA2),
!cast<ProcResource>(NAME # SiFive7VA));
defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA);
@@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
HasStdExtZkr];
int VLEN = vlen;
- bit HasExtraVALU = false;
+ bit HasDualVALU = false;
SiFive7FPLatencies FPLatencies;
bit HasFastGather = false;
@@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
}
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
- let HasExtraVALU = true;
+ let HasDualVALU = true;
let FPLatencies = SiFive7LowFPLatencies;
let HasFastGather = true;
}
@@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
/// Binding models to their scheduling resources.
foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
let SchedModel = model in
- defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
+ defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU,
model.FPLatencies,
model.HasFastGather>;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a466ab2..a0cff4d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform(
SPIRV::Decoration::NonUniformEXT, {});
}
}
- return;
}
bool SPIRVInstructionSelector::extractSubvector(
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 2934c88..fa08d44 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
}
}
-static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
-{
+[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) {
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
if (MRI->isPhysRegUsed(reg))
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
index d9c8e22..6e99fc3 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
.Case("i64", wasm::ValType::I64)
.Case("f32", wasm::ValType::F32)
.Case("f64", wasm::ValType::F64)
- .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2",
+ .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"},
wasm::ValType::V128)
.Case("funcref", wasm::ValType::FUNCREF)
.Case("externref", wasm::ValType::EXTERNREF)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc..f973949 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
MachineOperand Src = MI.getOperand(3);
MachineOperand Len = MI.getOperand(4);
+ // If the length is a constant, we don't actually need the check.
+ if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+ if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+ Def->getOpcode() == WebAssembly::CONST_I64) {
+ if (Def->getOperand(1).getImm() == 0) {
+ // A zero-length memcpy is a no-op.
+ MI.eraseFromParent();
+ return BB;
+ }
+ // A non-zero-length memcpy doesn't need a zero check.
+ unsigned MemoryCopy =
+ Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
+ BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
+ .add(DstMem)
+ .add(SrcMem)
+ .add(Dst)
+ .add(Src)
+ .add(Len);
+ MI.eraseFromParent();
+ return BB;
+ }
+ }
+
// We're going to add an extra use to `Len` to test if it's zero; that
// use shouldn't be a kill, even if the original use is.
MachineOperand NoKillLen = Len;
@@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
MachineOperand Val = MI.getOperand(2);
MachineOperand Len = MI.getOperand(3);
+ // If the length is a constant, we don't actually need the check.
+ if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+ if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+ Def->getOpcode() == WebAssembly::CONST_I64) {
+ if (Def->getOperand(1).getImm() == 0) {
+ // A zero-length memset is a no-op.
+ MI.eraseFromParent();
+ return BB;
+ }
+ // A non-zero-length memset doesn't need a zero check.
+ unsigned MemoryFill =
+ Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
+ BuildMI(*BB, MI, DL, TII.get(MemoryFill))
+ .add(Mem)
+ .add(Dst)
+ .add(Val)
+ .add(Len);
+ MI.eraseFromParent();
+ return BB;
+ }
+ }
+
// We're going to add an extra use to `Len` to test if it's zero; that
// use shouldn't be a kill, even if the original use is.
MachineOperand NoKillLen = Len;
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a8908d4..ac251fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
// xacquire <insn> ; xacquire must be accompanied by 'lock'
bool IsPrefix =
StringSwitch<bool>(Name)
- .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
- .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
- .Cases("xacquire", "xrelease", true)
- .Cases("acquire", "release", isParsingIntelSyntax())
+ .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true)
+ .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true)
+ .Cases({"xacquire", "xrelease"}, true)
+ .Cases({"acquire", "release"}, isParsingIntelSyntax())
.Default(false);
auto isLockRepeatNtPrefix = [](StringRef N) {
return StringSwitch<bool>(N)
- .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
+ .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"},
+ true)
.Default(false);
};
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 2bf016a..6db780f 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1338,7 +1338,6 @@ def ProcessorFeatures {
list<SubtargetFeature> PTLFeatures =
!listremove(ARLSFeatures, [FeatureWIDEKL]);
-
// Clearwaterforest
list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
FeatureAVXVNNIINT16,
@@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel,
}
def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
ProcessorFeatures.ADLTuning>;
-def : ProcModel<"pantherlake", AlderlakePModel,
+foreach P = ["pantherlake", "wildcatlake"] in {
+def : ProcModel<P, AlderlakePModel,
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
+}
def : ProcModel<"clearwaterforest", AlderlakePModel,
ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0991aa..9f88fda 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -602,8 +602,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
- const TableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c32b1a6..a0b64ff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
SDValue Src = Op1;
SDValue Op10 = Op1.getOperand(0);
- if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) {
- // res, flags2 = sub 0, (and (xor X, -1), Y)
+ if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) &&
+ llvm::isOneConstant(Op1.getOperand(1))) {
+ // res, flags2 = sub 0, (and (xor X, -1), 1)
// cload/cstore ..., cond_ne, flag2
// ->
- // res, flags2 = sub 0, (and X, Y)
+ // res, flags2 = sub 0, (and X, 1)
// cload/cstore ..., cond_e, flag2
Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0),
Op1.getOperand(1));
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 0fd44b7..ec31675 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+
+// If the globaladdr is an absolute_symbol, don't bother using the sign extending
+// instruction since there's no benefit to using it with absolute symbols.
+def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{
+ auto *GA = cast<GlobalAddressSDNode>(N);
+ return !GA->getGlobal()->getAbsoluteSymbolRange();
+}]>;
+def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)),
+ (MOV64ri32 tglobaladdr:$dst)>,
+ Requires<[KernelCode]>;
+
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper mcsym:$dst)),
diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index 89d5e0d..f6cea85 100644
--- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -22,13 +22,13 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v5e", "v5te")
.Case("v6j", "v6")
.Case("v6hl", "v6k")
- .Cases("v6m", "v6sm", "v6s-m", "v6-m")
- .Cases("v6z", "v6zk", "v6kz")
- .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+ .Cases({"v6m", "v6sm", "v6s-m"}, "v6-m")
+ .Cases({"v6z", "v6zk"}, "v6kz")
+ .Cases({"v7", "v7a", "v7hl", "v7l"}, "v7-a")
.Case("v7r", "v7-r")
.Case("v7m", "v7-m")
.Case("v7em", "v7e-m")
- .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
+ .Cases({"v8", "v8a", "v8l", "aarch64", "arm64"}, "v8-a")
.Case("v8.1a", "v8.1-a")
.Case("v8.2a", "v8.2-a")
.Case("v8.3a", "v8.3-a")
@@ -39,7 +39,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.8a", "v8.8-a")
.Case("v8.9a", "v8.9-a")
.Case("v8r", "v8-r")
- .Cases("v9", "v9a", "v9-a")
+ .Cases({"v9", "v9a"}, "v9-a")
.Case("v9.1a", "v9.1-a")
.Case("v9.2a", "v9.2-a")
.Case("v9.3a", "v9.3-a")
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index a5bdc9d..3479106 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -70,8 +70,8 @@
using namespace llvm;
-static std::unique_ptr<llvm::MemoryBuffer>
- LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
+[[maybe_unused]] static std::unique_ptr<llvm::MemoryBuffer>
+getProcCpuinfoContent() {
const char *CPUInfoFile = "/proc/cpuinfo";
if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO"))
CPUInfoFile = CpuinfoIntercept;
@@ -964,6 +964,13 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
*Subtype = X86::INTEL_COREI7_PANTHERLAKE;
break;
+ // Wildcatlake:
+ case 0xd5:
+ CPU = "wildcatlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
+ break;
+
// Graniterapids:
case 0xad:
CPU = "graniterapids";
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index 950bb2b..d765d9c 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::csky:
return computeCSKYDataLayout(*this);
case Triple::dxil:
+ // TODO: We need to align vectors on the element size generally, but for now
+ // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+ // See https://github.com/llvm/llvm-project/issues/123968
return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
- "f32:32-f64:64-n8:16:32:64";
+ "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64";
case Triple::hexagon:
return "e-m:e-p:32:32:32-a:0-n16:32-"
"i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index f021094..1068ce4 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -579,87 +579,89 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
}
static Triple::ArchType parseArch(StringRef ArchName) {
- auto AT = StringSwitch<Triple::ArchType>(ArchName)
- .Cases("i386", "i486", "i586", "i686", Triple::x86)
- // FIXME: Do we need to support these?
- .Cases("i786", "i886", "i986", Triple::x86)
- .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
- .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc)
- .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle)
- .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
- .Cases("powerpc64le", "ppc64le", Triple::ppc64le)
- .Case("xscale", Triple::arm)
- .Case("xscaleeb", Triple::armeb)
- .Case("aarch64", Triple::aarch64)
- .Case("aarch64_be", Triple::aarch64_be)
- .Case("aarch64_32", Triple::aarch64_32)
- .Case("arc", Triple::arc)
- .Case("arm64", Triple::aarch64)
- .Case("arm64_32", Triple::aarch64_32)
- .Case("arm64e", Triple::aarch64)
- .Case("arm64ec", Triple::aarch64)
- .Case("arm", Triple::arm)
- .Case("armeb", Triple::armeb)
- .Case("thumb", Triple::thumb)
- .Case("thumbeb", Triple::thumbeb)
- .Case("avr", Triple::avr)
- .Case("m68k", Triple::m68k)
- .Case("msp430", Triple::msp430)
- .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6",
- "mipsr6", Triple::mips)
- .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el",
- Triple::mipsel)
- .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6",
- "mips64r6", "mipsn32r6", Triple::mips64)
- .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
- "mipsn32r6el", Triple::mips64el)
- .Case("r600", Triple::r600)
- .Case("amdgcn", Triple::amdgcn)
- .Case("riscv32", Triple::riscv32)
- .Case("riscv64", Triple::riscv64)
- .Case("riscv32be", Triple::riscv32be)
- .Case("riscv64be", Triple::riscv64be)
- .Case("hexagon", Triple::hexagon)
- .Cases("s390x", "systemz", Triple::systemz)
- .Case("sparc", Triple::sparc)
- .Case("sparcel", Triple::sparcel)
- .Cases("sparcv9", "sparc64", Triple::sparcv9)
- .Case("tce", Triple::tce)
- .Case("tcele", Triple::tcele)
- .Case("xcore", Triple::xcore)
- .Case("nvptx", Triple::nvptx)
- .Case("nvptx64", Triple::nvptx64)
- .Case("amdil", Triple::amdil)
- .Case("amdil64", Triple::amdil64)
- .Case("hsail", Triple::hsail)
- .Case("hsail64", Triple::hsail64)
- .Case("spir", Triple::spir)
- .Case("spir64", Triple::spir64)
- .Cases("spirv", "spirv1.5", "spirv1.6", Triple::spirv)
- .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
- "spirv32v1.3", "spirv32v1.4", "spirv32v1.5",
- "spirv32v1.6", Triple::spirv32)
- .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
- "spirv64v1.3", "spirv64v1.4", "spirv64v1.5",
- "spirv64v1.6", Triple::spirv64)
- .StartsWith("kalimba", Triple::kalimba)
- .Case("lanai", Triple::lanai)
- .Case("renderscript32", Triple::renderscript32)
- .Case("renderscript64", Triple::renderscript64)
- .Case("shave", Triple::shave)
- .Case("ve", Triple::ve)
- .Case("wasm32", Triple::wasm32)
- .Case("wasm64", Triple::wasm64)
- .Case("csky", Triple::csky)
- .Case("loongarch32", Triple::loongarch32)
- .Case("loongarch64", Triple::loongarch64)
- .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
- "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
- "dxilv1.8", Triple::dxil)
- // Note: Cases has max limit of 10.
- .Case("dxilv1.9", Triple::dxil)
- .Case("xtensa", Triple::xtensa)
- .Default(Triple::UnknownArch);
+ auto AT =
+ StringSwitch<Triple::ArchType>(ArchName)
+ .Cases({"i386", "i486", "i586", "i686"}, Triple::x86)
+ // FIXME: Do we need to support these?
+ .Cases({"i786", "i886", "i986"}, Triple::x86)
+ .Cases({"amd64", "x86_64", "x86_64h"}, Triple::x86_64)
+ .Cases({"powerpc", "powerpcspe", "ppc", "ppc32"}, Triple::ppc)
+ .Cases({"powerpcle", "ppcle", "ppc32le"}, Triple::ppcle)
+ .Cases({"powerpc64", "ppu", "ppc64"}, Triple::ppc64)
+ .Cases({"powerpc64le", "ppc64le"}, Triple::ppc64le)
+ .Case("xscale", Triple::arm)
+ .Case("xscaleeb", Triple::armeb)
+ .Case("aarch64", Triple::aarch64)
+ .Case("aarch64_be", Triple::aarch64_be)
+ .Case("aarch64_32", Triple::aarch64_32)
+ .Case("arc", Triple::arc)
+ .Case("arm64", Triple::aarch64)
+ .Case("arm64_32", Triple::aarch64_32)
+ .Case("arm64e", Triple::aarch64)
+ .Case("arm64ec", Triple::aarch64)
+ .Case("arm", Triple::arm)
+ .Case("armeb", Triple::armeb)
+ .Case("thumb", Triple::thumb)
+ .Case("thumbeb", Triple::thumbeb)
+ .Case("avr", Triple::avr)
+ .Case("m68k", Triple::m68k)
+ .Case("msp430", Triple::msp430)
+ .Cases({"mips", "mipseb", "mipsallegrex", "mipsisa32r6", "mipsr6"},
+ Triple::mips)
+ .Cases({"mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el"},
+ Triple::mipsel)
+ .Cases({"mips64", "mips64eb", "mipsn32", "mipsisa64r6", "mips64r6",
+ "mipsn32r6"},
+ Triple::mips64)
+ .Cases({"mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
+ "mipsn32r6el"},
+ Triple::mips64el)
+ .Case("r600", Triple::r600)
+ .Case("amdgcn", Triple::amdgcn)
+ .Case("riscv32", Triple::riscv32)
+ .Case("riscv64", Triple::riscv64)
+ .Case("riscv32be", Triple::riscv32be)
+ .Case("riscv64be", Triple::riscv64be)
+ .Case("hexagon", Triple::hexagon)
+ .Cases({"s390x", "systemz"}, Triple::systemz)
+ .Case("sparc", Triple::sparc)
+ .Case("sparcel", Triple::sparcel)
+ .Cases({"sparcv9", "sparc64"}, Triple::sparcv9)
+ .Case("tce", Triple::tce)
+ .Case("tcele", Triple::tcele)
+ .Case("xcore", Triple::xcore)
+ .Case("nvptx", Triple::nvptx)
+ .Case("nvptx64", Triple::nvptx64)
+ .Case("amdil", Triple::amdil)
+ .Case("amdil64", Triple::amdil64)
+ .Case("hsail", Triple::hsail)
+ .Case("hsail64", Triple::hsail64)
+ .Case("spir", Triple::spir)
+ .Case("spir64", Triple::spir64)
+ .Cases({"spirv", "spirv1.5", "spirv1.6"}, Triple::spirv)
+ .Cases({"spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
+ "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", "spirv32v1.6"},
+ Triple::spirv32)
+ .Cases({"spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
+ "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6"},
+ Triple::spirv64)
+ .StartsWith("kalimba", Triple::kalimba)
+ .Case("lanai", Triple::lanai)
+ .Case("renderscript32", Triple::renderscript32)
+ .Case("renderscript64", Triple::renderscript64)
+ .Case("shave", Triple::shave)
+ .Case("ve", Triple::ve)
+ .Case("wasm32", Triple::wasm32)
+ .Case("wasm64", Triple::wasm64)
+ .Case("csky", Triple::csky)
+ .Case("loongarch32", Triple::loongarch32)
+ .Case("loongarch64", Triple::loongarch64)
+ .Cases({"dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
+ "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8",
+ "dxilv1.9"},
+ Triple::dxil)
+ .Case("xtensa", Triple::xtensa)
+ .Default(Triple::UnknownArch);
// Some architectures require special parsing logic just to compute the
// ArchType result.
@@ -1071,7 +1073,7 @@ Triple::Triple(std::string &&Str) : Data(std::move(Str)) {
.StartsWith("mips64", Triple::GNUABI64)
.StartsWith("mipsisa64", Triple::GNUABI64)
.StartsWith("mipsisa32", Triple::GNU)
- .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
+ .Cases({"mips", "mipsel", "mipsr6", "mipsr6el"}, Triple::GNU)
.Default(UnknownEnvironment);
}
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 1932a3a..e382cfe 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -378,6 +378,7 @@ constexpr ProcInfo Processors[] = {
{ {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Pantherlake microarchitecture based processors.
{ {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
+ { {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
// Sierraforest microarchitecture based processors.
{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 7c78eb3..444b390 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -396,9 +396,8 @@ class CHR {
} // end anonymous namespace
-static inline
-raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
- const CHRStats &Stats) {
+[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
+ const CHRStats &Stats) {
Stats.print(OS);
return OS;
}
@@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
return PSI.isFunctionEntryHot(&F);
}
-static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
- CHRStats *Stats) {
+[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
+ CHRStats *Stats) {
StringRef FuncName = F.getName();
StringRef ModuleName = F.getParent()->getName();
(void)(FuncName); // Unused in release build.
@@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
}
// Assert that all the CHR regions of the scope have a biased branch or select.
-static void LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]] static void
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
#ifndef NDEBUG
auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
@@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
// Assert that all the condition values of the biased branches and selects have
// been hoisted to the pre-entry block or outside of the scope.
-static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
- CHRScope *Scope, BasicBlock *PreEntryBlock) {
+[[maybe_unused]] static void
+assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+ BasicBlock *PreEntryBlock) {
CHR_DEBUG(dbgs() << "Biased regions condition values \n");
for (RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
@@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
}
}
-static void LLVM_ATTRIBUTE_UNUSED
-dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
+ const char *Label) {
dbgs() << Label << " " << Scopes.size() << "\n";
for (CHRScope *Scope : Scopes) {
dbgs() << *Scope << "\n";
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 09db464..386e48f 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS,
- BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h
index 232db2b..5cc4212 100644
--- a/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -47,8 +47,7 @@ enum Sequence {
S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
};
-raw_ostream &operator<<(raw_ostream &OS,
- const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S);
/// Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7ad710d..6141b6d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -77,6 +77,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
return nullptr;
}
-namespace {
// Returns true if \p I is an intrinsic that does not read or write memory.
-bool isNoopIntrinsic(Instruction *I) {
+static bool isNoopIntrinsic(Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
@@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
@@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return false;
}
+namespace {
+
// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
// defined by `MemDef`.
struct MemoryLocationWrapper {
@@ -889,23 +891,25 @@ struct MemoryDefWrapper {
SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
};
-bool hasInitializesAttr(Instruction *I) {
- CallBase *CB = dyn_cast<CallBase>(I);
- return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
-}
-
struct ArgumentInitInfo {
unsigned Idx;
bool IsDeadOrInvisibleOnUnwind;
ConstantRangeList Inits;
};
+} // namespace
+
+static bool hasInitializesAttr(Instruction *I) {
+ CallBase *CB = dyn_cast<CallBase>(I);
+ return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
+}
// Return the intersected range list of the initializes attributes of "Args".
// "Args" are call arguments that alias to each other.
// If any argument in "Args" doesn't have dead_on_unwind attr and
// "CallHasNoUnwindAttr" is false, return empty.
-ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
- bool CallHasNoUnwindAttr) {
+static ConstantRangeList
+getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
+ bool CallHasNoUnwindAttr) {
if (Args.empty())
return {};
@@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
return IntersectedIntervals;
}
+namespace {
+
struct DSEState {
Function &F;
AliasAnalysis &AA;
@@ -2328,10 +2334,11 @@ struct DSEState {
// change state: whether make any change.
bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
};
+} // namespace
// Return true if "Arg" is function local and isn't captured before "CB".
-bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
- EarliestEscapeAnalysis &EA) {
+static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
+ EarliestEscapeAnalysis &EA) {
const Value *UnderlyingObj = getUnderlyingObject(Arg);
return isIdentifiedFunctionLocal(UnderlyingObj) &&
capturesNothing(
@@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
return MadeChange;
}
-} // end anonymous namespace
//===----------------------------------------------------------------------===//
// DSE Pass
@@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
-namespace llvm {
-LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
return new DSELegacyPass();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 1c88532..b9534def 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -73,24 +73,17 @@
#include <utility>
using namespace llvm;
+using namespace llvm::GVNExpression;
#define DEBUG_TYPE "gvn-sink"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace llvm {
-namespace GVNExpression {
-
LLVM_DUMP_METHOD void Expression::dump() const {
print(dbgs());
dbgs() << "\n";
}
-} // end namespace GVNExpression
-} // end namespace llvm
-
-namespace {
-
static bool isMemoryInst(const Instruction *I) {
return isa<LoadInst>(I) || isa<StoreInst>(I) ||
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
@@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) {
//===----------------------------------------------------------------------===//
+namespace {
+
/// Candidate solution for sinking. There may be different ways to
/// sink instructions, differing in the number of instructions sunk,
/// the number of predecessors sunk from and the number of PHIs
@@ -125,14 +120,6 @@ struct SinkingInstructionCandidate {
}
};
-#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
- OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
- << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
- return OS;
-}
-#endif
-
//===----------------------------------------------------------------------===//
/// Describes a PHI node that may or may not exist. These track the PHIs
@@ -256,8 +243,18 @@ public:
return Values == Other.Values && Blocks == Other.Blocks;
}
};
+} // namespace
-template <typename ModelledPHI> struct DenseMapInfo {
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+#endif
+
+template <> struct llvm::DenseMapInfo<ModelledPHI> {
static inline ModelledPHI &getEmptyKey() {
static ModelledPHI Dummy = ModelledPHI::createDummy(0);
return Dummy;
@@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo {
}
};
-using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
+using ModelledPHISet = DenseSet<ModelledPHI>;
+
+namespace {
//===----------------------------------------------------------------------===//
// ValueTable
@@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
///
/// This class also contains fields for discriminators used when determining
/// equivalence of instructions with sideeffects.
-class InstructionUseExpr : public GVNExpression::BasicExpression {
+class InstructionUseExpr : public BasicExpression {
unsigned MemoryUseOrder = -1;
bool Volatile = false;
ArrayRef<int> ShuffleMask;
@@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression {
public:
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
BumpPtrAllocator &A)
- : GVNExpression::BasicExpression(I->getNumUses()) {
+ : BasicExpression(I->getNumUses()) {
allocateOperands(R, A);
setOpcode(I->getOpcode());
setType(I->getType());
@@ -315,8 +314,8 @@ public:
void setVolatile(bool V) { Volatile = V; }
hash_code getHashValue() const override {
- return hash_combine(GVNExpression::BasicExpression::getHashValue(),
- MemoryUseOrder, Volatile, ShuffleMask);
+ return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder,
+ Volatile, ShuffleMask);
}
template <typename Function> hash_code getHashValue(Function MapFn) {
@@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
class ValueTable {
DenseMap<Value *, uint32_t> ValueNumbering;
- DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<Expression *, uint32_t> ExpressionNumbering;
DenseMap<size_t, uint32_t> HashNumbering;
BumpPtrAllocator Allocator;
ArrayRecycler<Value *> Recycler;
@@ -594,6 +593,7 @@ private:
}
}
};
+} // namespace
std::optional<SinkingInstructionCandidate>
GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
@@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
NumRemoved += Insts.size() - 1;
}
-} // end anonymous namespace
-
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
GVNSink G;
if (!G.run(F))
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d2f09e9..a692009 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -551,12 +551,10 @@ public:
}
/// Support comparison with a single offset to allow binary searches.
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
- uint64_t RHSOffset) {
+ [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const Slice &RHS) {
+ [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
return LHSOffset < RHS.beginOffset();
}
@@ -2667,7 +2665,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
- V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+ // No profiling support for vector selects.
+ V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
+ DEBUG_TYPE, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b18acea..4fe736a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
Phi.replaceAllUsesWith(RdxResult);
- continue;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b96d29e..62a81ba 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// the vector loop or when not folding the tail. In the later case, we know
// that the canonical induction increment will not overflow as the vector trip
// count is >= increment and a multiple of the increment.
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
if (!HasNUW) {
- auto *IVInc = Plan->getVectorLoopRegion()
- ->getExitingBasicBlock()
- ->getTerminator()
- ->getOperand(0);
- assert(match(IVInc, m_VPInstruction<Instruction::Add>(
- m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
+ auto *IVInc =
+ LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
+ assert(match(IVInc,
+ m_VPInstruction<Instruction::Add>(
+ m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
}
@@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
- VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
@@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
for (VPValue *Old : Old2New.keys())
Old->getDefiningRecipe()->eraseFromParent();
- assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
- !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
+ assert(isa<VPRegionBlock>(LoopRegion) &&
+ !LoopRegion->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
@@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
ResumePhi = ScalarPHBuilder.createScalarPhi(
- {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
- "vec.epilog.resume.val");
+ {VectorTC,
+ MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()},
+ {}, "vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
if (MainScalarPH->begin() == MainScalarPH->end())
@@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV();
// When vectorizing the epilogue loop, the canonical induction needs to be
// adjusted by the value after the main vector loop. Find the resume value
// created during execution of the main VPlan. It must be the first phi in the
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23f5623..0e0b042 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1012,6 +1012,8 @@ public:
// part if scalar. In the latter case, the recipe will be removed during
// unrolling.
ExtractLastElement,
+ // Extracts the last lane for each part from its operand.
+ ExtractLastLanePerPart,
// Extracts the second-to-last lane from its operand or the second-to-last
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
@@ -4058,6 +4060,19 @@ public:
/// Remove the current region from its VPlan, connecting its predecessor to
/// its entry, and its exiting block to its successor.
void dissolveToCFGLoop();
+
+ /// Returns the canonical induction recipe of the region.
+ VPCanonicalIVPHIRecipe *getCanonicalIV() {
+ VPBasicBlock *EntryVPBB = getEntryBasicBlock();
+ if (EntryVPBB->empty()) {
+ // VPlan native path. TODO: Unify both code paths.
+ EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
+ }
+ return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
+ }
+ const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
+ return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
+ }
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4252,12 +4267,14 @@ public:
BackedgeTakenCount = new VPValue();
return BackedgeTakenCount;
}
+ VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
/// Returns the VF of the vector loop region.
VPValue &getVF() { return VF; };
+ const VPValue &getVF() const { return VF; };
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -4369,16 +4386,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- /// Returns the canonical induction recipe of the vector loop.
- VPCanonicalIVPHIRecipe *getCanonicalIV() {
- VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
- if (EntryVPBB->empty()) {
- // VPlan native path.
- EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
- }
- return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a..f413c63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c0147ce..332791a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
}
VPIRMetadata VPBranchWeights;
- auto *Term = VPBuilder(CheckBlockVPBB)
- .createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
- Plan.getCanonicalIV()->getDebugLoc());
+ auto *Term =
+ VPBuilder(CheckBlockVPBB)
+ .createNaryOp(
+ VPInstruction::BranchOnCond, {CondVPV},
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
if (AddBranchWeights) {
MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
@@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
- &Plan.getVectorTripCount());
+ auto *NewSel = Builder.createSelect(
+ AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
DerivedIV->moveAfter(&*Builder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
@@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
"FMaxNum/FMinNum reduction.\n");
return false;
}
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
+ auto *NewSel =
+ Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b42b049..ff286f7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -372,6 +372,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
+m_ExtractLastLanePerPart(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+}
+
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
@@ -394,6 +400,12 @@ m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t>
+m_FirstActiveLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 0c27d53..fb17d5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
// non-phi instructions.
auto &Plan = *HeaderVPBB->getPlan();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ auto *IV =
+ new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV());
Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
Builder.insert(IV);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2368d18..775837f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
@@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement: {
- unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
+ unsigned Offset =
+ getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
Value *Res;
if (State.VF.isVector()) {
assert(Offset <= State.VF.getKnownMinValue() &&
@@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractLastElement ||
+ getOpcode() == VPInstruction::ExtractLastLanePerPart ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
@@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExtractLane:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
@@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractLastElement:
O << "extract-last-element";
break;
+ case VPInstruction::ExtractLastLanePerPart:
+ O << "extract-last-lane-per-part";
+ break;
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
@@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
+ auto *CanIV = getParent()->getParent()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
getScalarType() == CanIV->getScalarType();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 40b7e8d..8d76b2d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
/// recipe, if it exists.
static void removeRedundantCanonicalIVs(VPlan &Plan) {
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
for (VPUser *U : CanonicalIV->users()) {
WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
@@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) {
if (!WidenNewIV)
return;
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
@@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
FPMathOperator *FPBinOp, Instruction *TruncI,
VPValue *StartV, VPValue *Step, DebugLoc DL,
VPBuilder &Builder) {
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
@@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
ScalarEvolution &SE) {
VPValue *Incoming, *Mask;
if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
- m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)),
- m_VPValue(Incoming))))
+ m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
return nullptr;
// Calculate the final index.
- VPValue *EndValue = Plan.getCanonicalIV();
- auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *CanonicalIV = LoopRegion->getCanonicalIV();
+ Type *CanonicalIVType = CanonicalIV->getScalarType();
VPBuilder B(cast<VPBasicBlock>(PredVPBB));
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
@@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
FirstActiveLaneType, DL);
- EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
+ VPValue *EndValue =
+ B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);
// `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
// changed it means the exit is using the incremented value, so we need to
@@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_ExtractLastElement(m_BuildVector()))) {
+ if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) {
+ if (match(Def,
+ m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
+ m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
Def->replaceAllUsesWith(A);
return;
}
- if (match(Def,
- m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) &&
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
+ m_ExtractLastLanePerPart(m_VPValue(A)))) &&
((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
(isa<VPReplicateRecipe>(A) &&
cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
[Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {
return Def->replaceAllUsesWith(A);
}
+
+ if (Plan->getUF() == 1 &&
+ match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
+ return Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
+ }
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
Clone->insertBefore(RepOrWidenR);
- auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement,
- {Clone->getOperand(0)});
+ unsigned ExtractOpc =
+ vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
+ ? VPInstruction::ExtractLastElement
+ : VPInstruction::ExtractLastLanePerPart;
+ auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
Ext->insertBefore(Clone);
Clone->setOperand(0, Ext);
RepR->eraseFromParent();
@@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
return U->usesScalars(RepOrWidenR) ||
match(cast<VPRecipeBase>(U),
- m_ExtractLastElement(m_VPValue()));
+ m_CombineOr(m_ExtractLastElement(m_VPValue()),
+ m_ExtractLastLanePerPart(m_VPValue())));
}))
continue;
@@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
});
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV();
if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ,
m_Specific(CanIV->getBackedgeValue()),
m_Specific(&Plan.getVectorTripCount()))))
@@ -2319,7 +2334,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = TopRegion->getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();
auto *CanonicalIVIncrement =
@@ -2358,7 +2373,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1));
auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{EntryIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.entry");
@@ -2394,13 +2409,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
SmallVector<VPValue *> WideCanonicalIVs;
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
- assert(count_if(Plan.getCanonicalIV()->users(),
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
+ assert(count_if(LoopRegion->getCanonicalIV()->users(),
IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
"Must have at most one VPWideCanonicalIVRecipe");
- if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
+ if (FoundWidenCanonicalIVUser !=
+ LoopRegion->getCanonicalIV()->users().end()) {
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
WideCanonicalIVs.push_back(WideCanonicalIV);
@@ -2408,7 +2425,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
// Also include VPWidenIntOrFpInductionRecipes that represent a widened
// version of the canonical induction.
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
@@ -2441,8 +2458,9 @@ void VPlanTransforms::addActiveLaneMask(
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
@@ -2455,7 +2473,7 @@ void VPlanTransforms::addActiveLaneMask(
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1));
LaneMask =
B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},
@@ -2565,9 +2583,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
});
assert(all_of(Plan.getVFxUF().users(),
- [&Plan](VPUser *U) {
- return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
- m_Specific(&Plan.getVFxUF()))) ||
+ [&LoopRegion, &Plan](VPUser *U) {
+ return match(U,
+ m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
+ m_Specific(&Plan.getVFxUF()))) ||
isa<VPWidenPointerInductionRecipe>(U);
}) &&
"Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
@@ -2722,9 +2741,10 @@ void VPlanTransforms::addExplicitVectorLength(
VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
if (Plan.hasScalarVFOnly())
return;
- VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
auto *CanIVTy = CanonicalIVPHI->getScalarType();
VPValue *StartV = CanonicalIVPHI->getStartValue();
@@ -4164,7 +4184,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// Adjust induction to reflect that the transformed plan only processes one
// original iteration.
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = VectorLoop->getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
VPBuilder PHBuilder(Plan.getVectorPreheader());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 1c4adfc..5aeda3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -69,7 +69,8 @@ class UnrollState {
VPBasicBlock::iterator InsertPtForPhi);
VPValue *getConstantVPV(unsigned Part) {
- Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ Type *CanIVIntTy =
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
}
@@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Compute*Result which combine all parts to compute the final value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
- match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Op1))) ||
+ match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 66748c5..8b1b0e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
-bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
+bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
return B == Plan.getTripCount() &&
- (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(),
- m_Specific(&Plan.getVF()))) ||
+ (match(A,
+ m_ScalarIVSteps(
+ m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+ m_One(), m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
- B == Plan.getOrCreateBackedgeTakenCount();
+ B == Plan.getBackedgeTakenCount();
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
@@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
return all_of(R->operands(), isUniformAcrossVFsAndUFs);
}
- auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV();
+ auto *CanonicalIV =
+ R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
// Canonical IV chain is uniform.
if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 0222b0a..cf95ac0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
}
/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(const VPValue *V, VPlan &Plan);
+bool isHeaderMask(const VPValue *V, const VPlan &Plan);
/// Checks if \p V is uniform across all VF lanes and UF parts. It is considered
/// as such if it is either loop invariant (defined outside the vector region)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5262af6..91734a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
}
- if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
- if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength &&
- !verifyEVLRecipe(*EVL)) {
- errs() << "EVL VPValue is not used correctly\n";
- return false;
+ if (const auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ switch (VPI->getOpcode()) {
+ case VPInstruction::ExplicitVectorLength:
+ if (!verifyEVLRecipe(*VPI)) {
+ errs() << "EVL VPValue is not used correctly\n";
+ return false;
+ }
+ break;
+ default:
+ break;
}
}
}