aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp32
-rw-r--r--llvm/lib/Analysis/IR2Vec.cpp180
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp10
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp5
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp49
-rw-r--r--llvm/lib/BinaryFormat/Dwarf.cpp2
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp16
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp10
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp16
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp6
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp3
-rw-r--r--llvm/lib/IR/AsmWriter.cpp14
-rw-r--r--llvm/lib/IR/ConstantFPRange.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp81
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp91
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td8
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td21
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td23
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrPredicates.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td142
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLoweringCall.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp37
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp192
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp218
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/LowerInvoke.cpp26
-rw-r--r--llvm/lib/Transforms/Utils/MisExpect.cpp61
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp10
62 files changed, 880 insertions, 702 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 1f0da8d1..8d20b0e 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -275,7 +275,7 @@ bool Dependence::isAnti() const {
// if no subscript in the source or destination mention the induction
// variable associated with the loop at this level.
// Leave this out of line, so it will serve as a virtual method anchor
-bool Dependence::isScalar(unsigned level, bool isSameSD) const { return false; }
+bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
//===----------------------------------------------------------------------===//
// FullDependence methods
@@ -351,38 +351,38 @@ bool FullDependence::normalize(ScalarEvolution *SE) {
// getDirection - Returns the direction associated with a particular common or
// SameSD level.
-unsigned FullDependence::getDirection(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).Direction;
+unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).Direction;
}
// Returns the distance (or NULL) associated with a particular common or
// SameSD level.
-const SCEV *FullDependence::getDistance(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).Distance;
+const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).Distance;
}
// Returns true if a particular regular or SameSD level is scalar; that is,
// if no subscript in the source or destination mention the induction variable
// associated with the loop at this level.
-bool FullDependence::isScalar(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).Scalar;
+bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).Scalar;
}
// Returns true if peeling the first iteration from this regular or SameSD
// loop level will break this dependence.
-bool FullDependence::isPeelFirst(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).PeelFirst;
+bool FullDependence::isPeelFirst(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).PeelFirst;
}
// Returns true if peeling the last iteration from this regular or SameSD
// loop level will break this dependence.
-bool FullDependence::isPeelLast(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).PeelLast;
+bool FullDependence::isPeelLast(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).PeelLast;
}
// Returns true if splitting loop will break the dependence.
-bool FullDependence::isSplitable(unsigned Level, bool isSameSD) const {
- return getDVEntry(Level, isSameSD).Splitable;
+bool FullDependence::isSplitable(unsigned Level, bool IsSameSD) const {
+ return getDVEntry(Level, IsSameSD).Splitable;
}
// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
@@ -691,7 +691,7 @@ void Dependence::dump(raw_ostream &OS) const {
dumpImp(OS);
unsigned SameSDLevels = getSameSDLevels();
if (SameSDLevels > 0) {
- OS << "! / assuming " << SameSDLevels << " loop level(s) fused: ";
+ OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
dumpImp(OS, true);
}
}
@@ -706,13 +706,13 @@ void Dependence::dump(raw_ostream &OS) const {
// For debugging purposes. Dumps a dependence to OS with or without considering
// the SameSD levels.
-void Dependence::dumpImp(raw_ostream &OS, bool isSameSD) const {
+void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
bool Splitable = false;
unsigned Levels = getLevels();
unsigned SameSDLevels = getSameSDLevels();
bool OnSameSD = false;
unsigned LevelNum = Levels;
- if (isSameSD)
+ if (IsSameSD)
LevelNum += SameSDLevels;
OS << " [";
for (unsigned II = 1; II <= LevelNum; ++II) {
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 1794a60..85b5372 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -153,11 +153,6 @@ void Embedding::print(raw_ostream &OS) const {
// Embedder and its subclasses
//===----------------------------------------------------------------------===//
-Embedder::Embedder(const Function &F, const Vocabulary &Vocab)
- : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()),
- OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight),
- FuncVector(Embedding(Dimension)) {}
-
std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
const Vocabulary &Vocab) {
switch (Mode) {
@@ -169,110 +164,85 @@ std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
return nullptr;
}
-const InstEmbeddingsMap &Embedder::getInstVecMap() const {
- if (InstVecMap.empty())
- computeEmbeddings();
- return InstVecMap;
-}
-
-const BBEmbeddingsMap &Embedder::getBBVecMap() const {
- if (BBVecMap.empty())
- computeEmbeddings();
- return BBVecMap;
-}
-
-const Embedding &Embedder::getBBVector(const BasicBlock &BB) const {
- auto It = BBVecMap.find(&BB);
- if (It != BBVecMap.end())
- return It->second;
- computeEmbeddings(BB);
- return BBVecMap[&BB];
-}
+Embedding Embedder::computeEmbeddings() const {
+ Embedding FuncVector(Dimension, 0.0);
-const Embedding &Embedder::getFunctionVector() const {
- // Currently, we always (re)compute the embeddings for the function.
- // This is cheaper than caching the vector.
- computeEmbeddings();
- return FuncVector;
-}
-
-void Embedder::computeEmbeddings() const {
if (F.isDeclaration())
- return;
-
- FuncVector = Embedding(Dimension, 0.0);
+ return FuncVector;
// Consider only the basic blocks that are reachable from entry
- for (const BasicBlock *BB : depth_first(&F)) {
- computeEmbeddings(*BB);
- FuncVector += BBVecMap[BB];
- }
+ for (const BasicBlock *BB : depth_first(&F))
+ FuncVector += computeEmbeddings(*BB);
+ return FuncVector;
}
-void SymbolicEmbedder::computeEmbeddings(const BasicBlock &BB) const {
+Embedding Embedder::computeEmbeddings(const BasicBlock &BB) const {
Embedding BBVector(Dimension, 0);
// We consider only the non-debug and non-pseudo instructions
- for (const auto &I : BB.instructionsWithoutDebug()) {
- Embedding ArgEmb(Dimension, 0);
- for (const auto &Op : I.operands())
- ArgEmb += Vocab[*Op];
- auto InstVector =
- Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
- if (const auto *IC = dyn_cast<CmpInst>(&I))
- InstVector += Vocab[IC->getPredicate()];
- InstVecMap[&I] = InstVector;
- BBVector += InstVector;
- }
- BBVecMap[&BB] = BBVector;
-}
-
-void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const {
- Embedding BBVector(Dimension, 0);
+ for (const auto &I : BB.instructionsWithoutDebug())
+ BBVector += computeEmbeddings(I);
+ return BBVector;
+}
+
+Embedding SymbolicEmbedder::computeEmbeddings(const Instruction &I) const {
+ // Currently, we always (re)compute the embeddings for symbolic embedder.
+ // This is cheaper than caching the vectors.
+ Embedding ArgEmb(Dimension, 0);
+ for (const auto &Op : I.operands())
+ ArgEmb += Vocab[*Op];
+ auto InstVector =
+ Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
+ if (const auto *IC = dyn_cast<CmpInst>(&I))
+ InstVector += Vocab[IC->getPredicate()];
+ return InstVector;
+}
+
+Embedding FlowAwareEmbedder::computeEmbeddings(const Instruction &I) const {
+ // If we have already computed the embedding for this instruction, return it
+ auto It = InstVecMap.find(&I);
+ if (It != InstVecMap.end())
+ return It->second;
- // We consider only the non-debug and non-pseudo instructions
- for (const auto &I : BB.instructionsWithoutDebug()) {
- // TODO: Handle call instructions differently.
- // For now, we treat them like other instructions
- Embedding ArgEmb(Dimension, 0);
- for (const auto &Op : I.operands()) {
- // If the operand is defined elsewhere, we use its embedding
- if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
- auto DefIt = InstVecMap.find(DefInst);
- // Fixme (#159171): Ideally we should never miss an instruction
- // embedding here.
- // But when we have cyclic dependencies (e.g., phi
- // nodes), we might miss the embedding. In such cases, we fall back to
- // using the vocabulary embedding. This can be fixed by iterating to a
- // fixed-point, or by using a simple solver for the set of simultaneous
- // equations.
- // Another case when we might miss an instruction embedding is when
- // the operand instruction is in a different basic block that has not
- // been processed yet. This can be fixed by processing the basic blocks
- // in a topological order.
- if (DefIt != InstVecMap.end())
- ArgEmb += DefIt->second;
- else
- ArgEmb += Vocab[*Op];
- }
- // If the operand is not defined by an instruction, we use the vocabulary
- else {
- LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
- << *Op << "=" << Vocab[*Op][0] << "\n");
+ // TODO: Handle call instructions differently.
+ // For now, we treat them like other instructions
+ Embedding ArgEmb(Dimension, 0);
+ for (const auto &Op : I.operands()) {
+ // If the operand is defined elsewhere, we use its embedding
+ if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
+ auto DefIt = InstVecMap.find(DefInst);
+ // Fixme (#159171): Ideally we should never miss an instruction
+ // embedding here.
+ // But when we have cyclic dependencies (e.g., phi
+ // nodes), we might miss the embedding. In such cases, we fall back to
+ // using the vocabulary embedding. This can be fixed by iterating to a
+ // fixed-point, or by using a simple solver for the set of simultaneous
+ // equations.
+ // Another case when we might miss an instruction embedding is when
+ // the operand instruction is in a different basic block that has not
+ // been processed yet. This can be fixed by processing the basic blocks
+ // in a topological order.
+ if (DefIt != InstVecMap.end())
+ ArgEmb += DefIt->second;
+ else
ArgEmb += Vocab[*Op];
- }
}
- // Create the instruction vector by combining opcode, type, and arguments
- // embeddings
- auto InstVector =
- Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
- // Add compare predicate embedding as an additional operand if applicable
- if (const auto *IC = dyn_cast<CmpInst>(&I))
- InstVector += Vocab[IC->getPredicate()];
- InstVecMap[&I] = InstVector;
- BBVector += InstVector;
+ // If the operand is not defined by an instruction, we use the
+ // vocabulary
+ else {
+ LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
+ << *Op << "=" << Vocab[*Op][0] << "\n");
+ ArgEmb += Vocab[*Op];
+ }
}
- BBVecMap[&BB] = BBVector;
+ // Create the instruction vector by combining opcode, type, and arguments
+ // embeddings
+ auto InstVector =
+ Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
+ if (const auto *IC = dyn_cast<CmpInst>(&I))
+ InstVector += Vocab[IC->getPredicate()];
+ InstVecMap[&I] = InstVector;
+ return InstVector;
}
// ==----------------------------------------------------------------------===//
@@ -695,25 +665,17 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
Emb->getFunctionVector().print(OS);
OS << "Basic block vectors:\n";
- const auto &BBMap = Emb->getBBVecMap();
for (const BasicBlock &BB : F) {
- auto It = BBMap.find(&BB);
- if (It != BBMap.end()) {
- OS << "Basic block: " << BB.getName() << ":\n";
- It->second.print(OS);
- }
+ OS << "Basic block: " << BB.getName() << ":\n";
+ Emb->getBBVector(BB).print(OS);
}
OS << "Instruction vectors:\n";
- const auto &InstMap = Emb->getInstVecMap();
for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
- auto It = InstMap.find(&I);
- if (It != InstMap.end()) {
- OS << "Instruction: ";
- I.print(OS);
- It->second.print(OS);
- }
+ OS << "Instruction: ";
+ I.print(OS);
+ Emb->getInstVector(I).print(OS);
}
}
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d1977f0..4e38626 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -671,12 +671,12 @@ Value *llvm::simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
/// This is very similar to stripAndAccumulateConstantOffsets(), except it
/// normalizes the offset bitwidth to the stripped pointer type, not the
/// original pointer type.
-static APInt stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
- bool AllowNonInbounds = false) {
+static APInt stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V) {
assert(V->getType()->isPtrOrPtrVectorTy());
APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
- V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
+ V = V->stripAndAccumulateConstantOffsets(DL, Offset,
+ /*AllowNonInbounds=*/true);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
return Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(V->getType()));
@@ -853,10 +853,12 @@ static Value *simplifySubInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
return W;
// Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
- if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y))))
+ if (match(Op0, m_PtrToIntOrAddr(m_Value(X))) &&
+ match(Op1, m_PtrToIntOrAddr(m_Value(Y)))) {
if (Constant *Result = computePointerDifference(Q.DL, X, Y))
return ConstantFoldIntegerCast(Result, Op0->getType(), /*IsSigned*/ true,
Q.DL);
+ }
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 6e92766..813632c 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -740,11 +740,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_fgets_unlocked);
}
- if (T.isAndroid() && T.isAndroidVersionLT(21)) {
- TLI.setUnavailable(LibFunc_stpcpy);
- TLI.setUnavailable(LibFunc_stpncpy);
- }
-
if (T.isPS()) {
// PS4/PS5 do have memalign.
TLI.setAvailable(LibFunc_memalign);
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f6937d3..50d1d47 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -982,6 +982,7 @@ lltok::Kind LLLexer::LexIdentifier() {
DWKEYWORD(ATE, DwarfAttEncoding);
DWKEYWORD(VIRTUALITY, DwarfVirtuality);
DWKEYWORD(LANG, DwarfLang);
+ DWKEYWORD(LNAME, DwarfSourceLangName);
DWKEYWORD(CC, DwarfCC);
DWKEYWORD(OP, DwarfOp);
DWKEYWORD(MACINFO, DwarfMacinfo);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 5589966..380b192 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4740,6 +4740,10 @@ struct DwarfLangField : public MDUnsignedField {
DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
};
+struct DwarfSourceLangNameField : public MDUnsignedField {
+ DwarfSourceLangNameField() : MDUnsignedField(0, UINT32_MAX) {}
+};
+
struct DwarfCCField : public MDUnsignedField {
DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
};
@@ -4998,6 +5002,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfLangField &Result) {
}
template <>
+bool LLParser::parseMDField(LocTy Loc, StringRef Name,
+ DwarfSourceLangNameField &Result) {
+ if (Lex.getKind() == lltok::APSInt)
+ return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+ if (Lex.getKind() != lltok::DwarfSourceLangName)
+ return tokError("expected DWARF source language name");
+
+ unsigned Lang = dwarf::getSourceLanguageName(Lex.getStrVal());
+ if (!Lang)
+ return tokError("invalid DWARF source language name" + Twine(" '") +
+ Lex.getStrVal() + "'");
+ assert(Lang <= Result.Max && "Expected valid DWARF source language name");
+ Result.assign(Lang);
+ Lex.Lex();
+ return false;
+}
+
+template <>
bool LLParser::parseMDField(LocTy Loc, StringRef Name, DwarfCCField &Result) {
if (Lex.getKind() == lltok::APSInt)
return parseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
@@ -5836,9 +5859,12 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
if (!IsDistinct)
return tokError("missing 'distinct', required for !DICompileUnit");
+ LocTy Loc = Lex.getLoc();
+
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(language, DwarfLangField, ); \
REQUIRED(file, MDField, (/* AllowNull */ false)); \
+ OPTIONAL(language, DwarfLangField, ); \
+ OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, ); \
OPTIONAL(producer, MDStringField, ); \
OPTIONAL(isOptimized, MDBoolField, ); \
OPTIONAL(flags, MDStringField, ); \
@@ -5860,12 +5886,23 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
+ if (!language.Seen && !sourceLanguageName.Seen)
+ return error(Loc, "missing one of 'language' or 'sourceLanguageName', "
+ "required for !DICompileUnit");
+
+ if (language.Seen && sourceLanguageName.Seen)
+ return error(Loc, "can only specify one of 'language' and "
+ "'sourceLanguageName' on !DICompileUnit");
+
Result = DICompileUnit::getDistinct(
- Context, DISourceLanguageName(language.Val), file.Val, producer.Val,
- isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val,
- emissionKind.Val, enums.Val, retainedTypes.Val, globals.Val, imports.Val,
- macros.Val, dwoId.Val, splitDebugInlining.Val, debugInfoForProfiling.Val,
- nameTableKind.Val, rangesBaseAddress.Val, sysroot.Val, sdk.Val);
+ Context,
+ language.Seen ? DISourceLanguageName(language.Val)
+ : DISourceLanguageName(sourceLanguageName.Val, 0),
+ file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
+ splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
+ globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
+ debugInfoForProfiling.Val, nameTableKind.Val, rangesBaseAddress.Val,
+ sysroot.Val, sdk.Val);
return false;
}
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 969047a..55fa2df 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -893,6 +893,8 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
return DefaultedMemberString(Val);
case DW_AT_APPLE_enum_kind:
return EnumKindString(Val);
+ case DW_AT_language_name:
+ return SourceLanguageNameString(static_cast<SourceLanguageName>(Val));
}
return StringRef();
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index a4d1b83..cdcf7a8 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1867,12 +1867,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// distinct. It's always distinct.
IsDistinct = true;
+ const auto LangVersionMask = (uint64_t(1) << 63);
+ const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+
auto *CU = DICompileUnit::getDistinct(
- Context, DISourceLanguageName(Record[1]), getMDOrNull(Record[2]),
- getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6],
- getMDString(Record[7]), Record[8], getMDOrNull(Record[9]),
- getMDOrNull(Record[10]), getMDOrNull(Record[12]),
- getMDOrNull(Record[13]),
+ Context,
+ HasVersionedLanguage
+ ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+ : DISourceLanguageName(Record[1]),
+ getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
+ getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+ getMDOrNull(Record[12]), getMDOrNull(Record[13]),
Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7ed140d..0ca55a26 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2108,7 +2108,13 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
assert(N->isDistinct() && "Expected distinct compile units");
Record.push_back(/* IsDistinct */ true);
- Record.push_back(N->getSourceLanguage().getUnversionedName());
+ auto Lang = N->getSourceLanguage();
+ Record.push_back(Lang.getName());
+ // Set bit so the MetadataLoader can distniguish between versioned and
+ // unversioned names.
+ if (Lang.hasVersionedName())
+ Record.back() ^= (uint64_t(1) << 63);
+
Record.push_back(VE.getMetadataOrNullID(N->getFile()));
Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
Record.push_back(N->isOptimized());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index bc0bb34..f0f0861 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -587,10 +587,12 @@ bool DwarfExpression::addExpression(
emitUnsigned(LeftShift);
emitOp(dwarf::DW_OP_shl);
}
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(RightShift);
- emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra
- : dwarf::DW_OP_shr);
+ if (RightShift) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(RightShift);
+ emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra
+ : dwarf::DW_OP_shr);
+ }
// The value is now at the top of the stack, so set the location to
// implicit so that we get a stack_value at the end.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index eb73d01b..4320b1d 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3194,7 +3194,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
case ScaledRegField:
return ScaledReg;
case BaseOffsField:
- return ConstantInt::get(IntPtrTy, BaseOffs);
+ return ConstantInt::getSigned(IntPtrTy, BaseOffs);
}
}
@@ -6100,7 +6100,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Add in the Base Offset if present.
if (AddrMode.BaseOffs) {
- Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
if (ResultIndex) {
// We need to add this separately from the scale above to help with
// SDAG consecutive load/store merging.
@@ -6226,7 +6226,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Add in the Base Offset if present.
if (AddrMode.BaseOffs) {
- Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
if (Result)
Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 906d62a3..b425b95 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
Result.clearSign();
return Result;
}
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
bool Unused;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3f6813e..90c60d4 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -344,6 +344,22 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = KnownBits::mul(Known, Known2);
break;
}
+ case TargetOpcode::G_UMULH: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::mulhu(Known, Known2);
+ break;
+ }
+ case TargetOpcode::G_SMULH: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::mulhs(Known, Known2);
+ break;
+ }
case TargetOpcode::G_SELECT: {
computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
Known, DemandedElts, Depth + 1);
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ebfea8e..e17a214 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy(
}
if (CP.getNewRC()) {
+ if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) {
+ LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC())
+ << "are available for allocation\n");
+ return false;
+ }
+
auto SrcRC = MRI->getRegClass(CP.getSrcReg());
auto DstRC = MRI->getRegClass(CP.getDstReg());
unsigned SrcIdx = CP.getSrcIdx();
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index edc69a3..212a0c0 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -149,7 +149,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
if (!Name.empty())
WithColor(OS, Color) << Name;
else if (Attr == DW_AT_decl_line || Attr == DW_AT_decl_column ||
- Attr == DW_AT_call_line || Attr == DW_AT_call_column) {
+ Attr == DW_AT_call_line || Attr == DW_AT_call_column ||
+ Attr == DW_AT_language_version) {
if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
OS << *Val;
else
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd..0bc877d 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2370,10 +2370,16 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Out << "!DICompileUnit(";
MDFieldPrinter Printer(Out, WriterCtx);
- Printer.printDwarfEnum("language",
- N->getSourceLanguage().getUnversionedName(),
- dwarf::LanguageString,
- /* ShouldSkipZero */ false);
+ auto Lang = N->getSourceLanguage();
+ if (Lang.hasVersionedName())
+ Printer.printDwarfEnum(
+ "sourceLanguageName",
+ static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+ dwarf::SourceLanguageNameString,
+ /* ShouldSkipZero */ false);
+ else
+ Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
+ /* ShouldSkipZero */ false);
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printString("producer", N->getProducer());
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index 7509188..fba6942 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -391,3 +391,23 @@ ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const {
return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper),
MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN);
}
+
+ConstantFPRange ConstantFPRange::abs() const {
+ if (isNaNOnly())
+ return *this;
+ // Check if the range is all non-negative or all non-positive.
+ if (Lower.isNegative() == Upper.isNegative()) {
+ if (Lower.isNegative())
+ return negate();
+ return *this;
+ }
+ // The range contains both positive and negative values.
+ APFloat NewLower = APFloat::getZero(getSemantics());
+ APFloat NewUpper = maxnum(-Lower, Upper);
+ return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN,
+ MayBeSNaN);
+}
+
+ConstantFPRange ConstantFPRange::negate() const {
+ return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 639ddcb..ecaeff7 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -350,7 +350,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, cast_of_cast_combines,
+ [copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
buildvector_of_truncate, integer_of_truncate,
mutate_anyext_to_zext, combines_for_extload,
combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 31b3d18..7294f3e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16249,7 +16249,9 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
bool Negated;
uint64_t SplatVal;
- if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ // NOTE: SRAD cannot be used to represent sdiv-by-one.
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) &&
+ SplatVal > 1) {
SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
SDValue Res =
DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
@@ -30034,7 +30036,9 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
bool Negated;
uint64_t SplatVal;
- if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ // NOTE: SRAD cannot be used to represent sdiv-by-one.
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) &&
+ SplatVal > 1) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32);
@@ -30606,6 +30610,43 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
+ if (Op->getNumOperands() == 3) {
+ // aarch64_sve_ld3 only supports packed datatypes.
+ EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount());
+ Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment);
+
+ // Write out unmodified operands.
+ SmallVector<SDValue, 3> Chains;
+ for (unsigned I = 0; I < 3; ++I) {
+ SDValue Ptr =
+ DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL);
+ SDValue V = getSVESafeBitCast(PackedVT, Op.getOperand(I), DAG);
+ Chains.push_back(
+ DAG.getStore(DAG.getEntryNode(), DL, V, Ptr, MachinePointerInfo()));
+ }
+
+ Intrinsic::ID IntID = Intrinsic::aarch64_sve_ld3_sret;
+ EVT PredVT = PackedVT.changeVectorElementType(MVT::i1);
+
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains));
+ Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
+ Ops.push_back(DAG.getConstant(1, DL, PredVT));
+ Ops.push_back(StackPtr);
+
+ // Read back and deinterleave data.
+ SDVTList VTs = DAG.getVTList(PackedVT, PackedVT, PackedVT, MVT::Other);
+ SDValue LD3 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
+
+ SmallVector<SDValue, 3> Results;
+ Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(0), DAG));
+ Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(1), DAG));
+ Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(2), DAG));
+ return DAG.getMergeValues(Results, DL);
+ }
+
// Are multi-register uzp instructions available?
if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
OpVT.getVectorElementType() != MVT::i1) {
@@ -30647,6 +30688,42 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
+ if (Op->getNumOperands() == 3) {
+ // aarch64_sve_st3 only supports packed datatypes.
+ EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount());
+ SmallVector<SDValue, 3> InVecs;
+ for (SDValue V : Op->ops())
+ InVecs.push_back(getSVESafeBitCast(PackedVT, V, DAG));
+
+ Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment);
+
+ Intrinsic::ID IntID = Intrinsic::aarch64_sve_st3;
+ EVT PredVT = PackedVT.changeVectorElementType(MVT::i1);
+
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
+ Ops.append(InVecs);
+ Ops.push_back(DAG.getConstant(1, DL, PredVT));
+ Ops.push_back(StackPtr);
+
+ // Interleave operands and store.
+ SDValue Chain = DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops);
+
+ // Read back the interleaved data.
+ SmallVector<SDValue, 3> Results;
+ for (unsigned I = 0; I < 3; ++I) {
+ SDValue Ptr =
+ DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL);
+ SDValue L = DAG.getLoad(PackedVT, DL, Chain, Ptr, MachinePointerInfo());
+ Results.push_back(getSVESafeBitCast(OpVT, L, DAG));
+ }
+
+ return DAG.getMergeValues(Results, DL);
+ }
+
// Are multi-register zip instructions available?
if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
OpVT.getVectorElementType() != MVT::i1) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 24bef82..8e35ba7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "SIModeRegisterDefaults.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -27,6 +28,7 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
@@ -106,6 +108,7 @@ public:
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
mutable Function *LdexpF32 = nullptr;
+ mutable SmallVector<WeakVH> DeadVals;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
@@ -242,6 +245,8 @@ public:
Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
FastMathFlags FMF) const;
+ bool tryNarrowMathIfNoOverflow(Instruction *I);
+
public:
bool visitFDiv(BinaryOperator &I);
@@ -281,28 +286,21 @@ bool AMDGPUCodeGenPrepareImpl::run() {
BreakPhiNodesCache.clear();
bool MadeChange = false;
- Function::iterator NextBB;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) {
- BasicBlock *BB = &*FI;
- NextBB = std::next(FI);
-
- BasicBlock::iterator Next;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
- I = Next) {
- Next = std::next(I);
-
- MadeChange |= visit(*I);
-
- if (Next != E) { // Control flow changed
- BasicBlock *NextInstBB = Next->getParent();
- if (NextInstBB != BB) {
- BB = NextInstBB;
- E = BB->end();
- FE = F.end();
- }
- }
+ // Need to use make_early_inc_range because integer division expansion is
+ // handled by Transform/Utils, and it can delete instructions such as the
+ // terminator of the BB.
+ for (BasicBlock &BB : reverse(F)) {
+ for (Instruction &I : make_early_inc_range(reverse(BB))) {
+ if (!isInstructionTriviallyDead(&I, TLI))
+ MadeChange |= visit(I);
}
}
+
+ while (!DeadVals.empty()) {
+ if (auto *I = dyn_cast_or_null<Instruction>(DeadVals.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ }
+
return MadeChange;
}
@@ -422,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
Value *NewVal = insertValues(Builder, Ty, ResultVals);
NewVal->takeName(&I);
I.replaceAllUsesWith(NewVal);
- I.eraseFromParent();
+ DeadVals.push_back(&I);
return true;
}
@@ -496,10 +494,10 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
FoldedT, FoldedF);
NewSelect->takeName(&BO);
BO.replaceAllUsesWith(NewSelect);
- BO.eraseFromParent();
+ DeadVals.push_back(&BO);
if (CastOp)
- CastOp->eraseFromParent();
- Sel->eraseFromParent();
+ DeadVals.push_back(CastOp);
+ DeadVals.push_back(Sel);
return true;
}
@@ -895,7 +893,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
if (NewVal) {
FDiv.replaceAllUsesWith(NewVal);
NewVal->takeName(&FDiv);
- RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
+ DeadVals.push_back(&FDiv);
}
return true;
@@ -1302,10 +1300,7 @@ it will create `s_and_b32 s0, s0, 0xff`.
We accept this change since the non-byte load assumes the upper bits
within the byte are all 0.
*/
-static bool tryNarrowMathIfNoOverflow(Instruction *I,
- const SITargetLowering *TLI,
- const TargetTransformInfo &TTI,
- const DataLayout &DL) {
+bool AMDGPUCodeGenPrepareImpl::tryNarrowMathIfNoOverflow(Instruction *I) {
unsigned Opc = I->getOpcode();
Type *OldType = I->getType();
@@ -1330,6 +1325,7 @@ static bool tryNarrowMathIfNoOverflow(Instruction *I,
NewType = I->getType()->getWithNewBitWidth(NewBit);
// Old cost
+ const TargetTransformInfo &TTI = TM.getTargetTransformInfo(F);
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Opc, OldType, TTI::TCK_RecipThroughput);
// New cost of new op
@@ -1360,7 +1356,7 @@ static bool tryNarrowMathIfNoOverflow(Instruction *I,
Value *Zext = Builder.CreateZExt(Arith, OldType);
I->replaceAllUsesWith(Zext);
- I->eraseFromParent();
+ DeadVals.push_back(I);
return true;
}
@@ -1370,8 +1366,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (UseMul24Intrin && replaceMulWithMul24(I))
return true;
- if (tryNarrowMathIfNoOverflow(&I, ST.getTargetLowering(),
- TM.getTargetTransformInfo(F), DL))
+ if (tryNarrowMathIfNoOverflow(&I))
return true;
bool Changed = false;
@@ -1436,7 +1431,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (NewDiv) {
I.replaceAllUsesWith(NewDiv);
- I.eraseFromParent();
+ DeadVals.push_back(&I);
Changed = true;
}
}
@@ -1492,7 +1487,7 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
I.replaceAllUsesWith(ValOrig);
- I.eraseFromParent();
+ DeadVals.push_back(&I);
return true;
}
@@ -1534,7 +1529,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
+ DeadVals.push_back(&I);
return true;
}
@@ -1822,7 +1817,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
}
I.replaceAllUsesWith(Vec);
- I.eraseFromParent();
+ DeadVals.push_back(&I);
return true;
}
@@ -1903,7 +1898,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
auto *Intrin = B.CreateIntrinsic(
I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)});
I.replaceAllUsesWith(Intrin);
- I.eraseFromParent();
+ DeadVals.push_back(&I);
return true;
}
@@ -2000,16 +1995,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) {
Value *Fract = applyFractPat(Builder, FractArg);
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
-
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
+ DeadVals.push_back(&I);
return true;
}
-static bool isOneOrNegOne(const Value *Val) {
- const APFloat *C;
- return match(Val, m_APFloat(C)) && C->getExactLog2Abs() == 0;
-}
-
// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
Type *Ty = Sqrt.getType()->getScalarType();
@@ -2030,18 +2019,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
if (ReqdAccuracy < 1.0f)
return false;
- // FIXME: This is an ugly hack for this pass using forward iteration instead
- // of reverse. If it worked like a normal combiner, the rsq would form before
- // we saw a sqrt call.
- auto *FDiv =
- dyn_cast_or_null<FPMathOperator>(Sqrt.getUniqueUndroppableUser());
- if (FDiv && FDiv->getOpcode() == Instruction::FDiv &&
- FDiv->getFPAccuracy() >= 1.0f &&
- canOptimizeWithRsq(FPOp, FDiv->getFastMathFlags(), SqrtFMF) &&
- // TODO: We should also handle the arcp case for the fdiv with non-1 value
- isOneOrNegOne(FDiv->getOperand(0)))
- return false;
-
Value *SrcVal = Sqrt.getOperand(0);
bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
@@ -2065,7 +2042,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
Value *NewSqrt = insertValues(Builder, Sqrt.getType(), ResultVals);
NewSqrt->takeName(&Sqrt);
Sqrt.replaceAllUsesWith(NewSqrt);
- Sqrt.eraseFromParent();
+ DeadVals.push_back(&Sqrt);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 73b2660..5407566 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -468,6 +468,38 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
+ Register Lo, Hi;
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_SMIN:
+ case AMDGPU::G_SMAX: {
+ // For signed operations, use sign extension
+ auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
+ auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
+ Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
+ .getReg(0);
+ Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
+ .getReg(0);
+ break;
+ }
+ case AMDGPU::G_UMIN:
+ case AMDGPU::G_UMAX: {
+ // For unsigned operations, use zero extension
+ auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
+ auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
+ Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
+ .getReg(0);
+ Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
+ .getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("Unpack min/max lowering not implemented");
+ }
+ B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
+ MI.eraseFromParent();
+}
+
static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -654,6 +686,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
}
case UnpackBitShift:
return lowerUnpackBitShift(MI);
+ case UnpackMinMax:
+ return lowerUnpackMinMax(MI);
case Ext32To64: {
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
MachineInstrBuilder Hi;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 7affe5a..d937815 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -123,6 +123,7 @@ private:
void lowerSplitTo32(MachineInstr &MI);
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
+ void lowerUnpackMinMax(MachineInstr &MI);
};
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index f413bbc..7392f4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -522,6 +522,22 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
+ addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
+ .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
+
+ addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
+ .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
+
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
// and G_FREEZE here, rest is trivially regbankselected earlier
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index d0c6910..93e0efd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -212,6 +212,7 @@ enum LoweringMethodID {
VccExtToSel,
UniExtToSel,
UnpackBitShift,
+ UnpackMinMax,
S_BFE,
V_BFE,
VgprToVccCopy,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 557d87f..56807a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5053,16 +5053,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
//
// vdst, srcA, srcB, srcC
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ bool UseAGPRForm = !Subtarget.hasGFX90AInsts() ||
+ Info->selectAGPRFormMFMA(MinNumRegsRequired);
+
OpdsMapping[0] =
- Info->getMinNumAGPRs() >= MinNumRegsRequired
- ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
- : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ UseAGPRForm ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
- Info->getMinNumAGPRs() >= MinNumRegsRequired
- ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
- : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
@@ -5115,11 +5117,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: {
+ Register DstReg = MI.getOperand(0).getReg();
+ unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ unsigned MinNumRegsRequired = DstSize / 32;
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ bool UseAGPRForm = Info->selectAGPRFormMFMA(MinNumRegsRequired);
+
// vdst, srcA, srcB, srcC, idx
- OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = UseAGPRForm ? getAGPROpMapping(DstReg, MRI, *TRI)
+ : getVGPROpMapping(DstReg, MRI, *TRI);
+
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
- OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[4] =
+ UseAGPRForm ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index ef63acc..71494be 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -905,7 +905,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
OS << ":\n";
SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
- SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB);
+ SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
GCNRPTracker::LiveRegSet LiveIn, LiveOut;
GCNRegPressure RPAtMBBEnd;
@@ -931,7 +931,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
} else {
GCNUpwardRPTracker RPT(LIS);
- RPT.reset(MRI, MBBEndSlot);
+ RPT.reset(MRI, MBBLastSlot);
LiveOut = RPT.getLiveRegs();
RPAtMBBEnd = RPT.getPressure();
@@ -966,14 +966,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
if (UseDownwardTracker)
- ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI));
+ ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBLastSlot, LIS, MRI));
GCNRPTracker::LiveRegSet LiveThrough;
for (auto [Reg, Mask] : LiveIn) {
LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
if (MaskIntersection.any()) {
LaneBitmask LTMask = getRegLiveThroughMask(
- MRI, LIS, Reg, MBBStartSlot, MBBEndSlot, MaskIntersection);
+ MRI, LIS, Reg, MBBStartSlot, MBBLastSlot, MaskIntersection);
if (LTMask.any())
LiveThrough[Reg] = LTMask;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a9c58bb..898d1ff 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -313,8 +313,8 @@ public:
/// reset tracker to the end of the \p MBB.
void reset(const MachineBasicBlock &MBB) {
- reset(MBB.getParent()->getRegInfo(),
- LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
+ SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
+ reset(MBB.getParent()->getRegInfo(), MBBLastSlot);
}
/// reset tracker to the point just after \p MI (in program order).
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 730be69..80e985d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -103,52 +103,52 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::Untyped, V64RegClass);
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
- addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
+ addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
- addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
- addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
+ addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
- addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
+ addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
- addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
+ addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
- addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
+ addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
- addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
+ addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
- addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
+ addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
- addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
if (Subtarget->has16BitInsts()) {
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
- addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index b7dbb59..2c1a13c 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1202,6 +1202,12 @@ public:
unsigned getMinNumAGPRs() const { return MinNumAGPRs; }
+ /// Return true if an MFMA that requires at least \p NumRegs should select to
+ /// the AGPR form, instead of the VGPR form.
+ bool selectAGPRFormMFMA(unsigned NumRegs) const {
+ return !MFMAVGPRForm && getMinNumAGPRs() >= NumRegs;
+ }
+
// \returns true if a function has a use of AGPRs via inline asm or
// has a call which may use it.
bool mayUseAGPRs(const Function &F) const;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 7cfd059..6500fce 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -964,14 +964,12 @@ class MAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false> : Pa
class CanUseAGPR_MAI<ValueType vt> {
code PredicateCode = [{
return !Subtarget->hasGFX90AInsts() ||
- (!SIMachineFunctionInfo::MFMAVGPRForm &&
- MF->getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
- }] # !srl(vt.Size, 5) # ");";
+ MF->getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA(
+ }] # !srl(vt.Size, 5) # ");";
code GISelPredicateCode = [{
return !Subtarget->hasGFX90AInsts() ||
- (!SIMachineFunctionInfo::MFMAVGPRForm &&
- MF.getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
+ MF.getInfo<SIMachineFunctionInfo>()->selectAGPRFormMFMA(
}] # !srl(vt.Size, 5) # ");";
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2a40fb9..83c7def 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,7 +42,6 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index a0acfcf..85ce944 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -699,35 +699,20 @@ def: OpR_RR_pat<C2_cmpgtp, setgt, i1, I64>;
def: OpR_RR_pat<C2_cmpgtup, setugt, i1, I64>;
def: OpR_RR_pat<C2_cmpgtp, RevCmp<setlt>, i1, I64>;
def: OpR_RR_pat<C2_cmpgtup, RevCmp<setult>, i1, I64>;
-def: OpR_RR_pat<A2_vcmpbeq, seteq, i1, V8I8>;
def: OpR_RR_pat<A2_vcmpbeq, seteq, v8i1, V8I8>;
-def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, i1, V8I8>;
def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, v8i1, V8I8>;
-def: OpR_RR_pat<A4_vcmpbgt, setgt, i1, V8I8>;
def: OpR_RR_pat<A4_vcmpbgt, setgt, v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, i1, V8I8>;
def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpbgtu, setugt, i1, V8I8>;
def: OpR_RR_pat<A2_vcmpbgtu, setugt, v8i1, V8I8>;
-def: OpR_RR_pat<A2_vcmpheq, seteq, i1, V4I16>;
def: OpR_RR_pat<A2_vcmpheq, seteq, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, i1, V4I16>;
def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgt, setgt, i1, V4I16>;
def: OpR_RR_pat<A2_vcmphgt, setgt, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, i1, V4I16>;
def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmphgtu, setugt, i1, V4I16>;
def: OpR_RR_pat<A2_vcmphgtu, setugt, v4i1, V4I16>;
-def: OpR_RR_pat<A2_vcmpweq, seteq, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpweq, seteq, v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgt, setgt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgt, setgt, v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
-def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
@@ -1213,12 +1198,6 @@ def: OpR_RI_pat<S2_asl_i_r, Shl, i32, I32, u5_0ImmPred>;
def: OpR_RI_pat<S2_asr_i_p, Sra, i64, I64, u6_0ImmPred>;
def: OpR_RI_pat<S2_lsr_i_p, Srl, i64, I64, u6_0ImmPred>;
def: OpR_RI_pat<S2_asl_i_p, Shl, i64, I64, u6_0ImmPred>;
-def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>;
-def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>;
-def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>;
-def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>;
def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>;
def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>;
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ba70c9e..97379d7 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -3677,7 +3677,7 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
Out, STI))
return true;
- if (IsLikely) {
+ if (IsLikely && MemOffsetOp.isExpr()) {
TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg,
MCOperand::createExpr(MemOffsetOp.getExpr()), IDLoc, STI);
TOut.emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index eff80e5..21d8ded 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -855,6 +855,16 @@ def calltarget : Operand<iPTR> {
def imm64: Operand<i64>;
+def ConstantImmAsmOperandClass : AsmOperandClass {
+ let Name = "ConstantImm";
+ let PredicateMethod = "isConstantImm";
+ let RenderMethod = "addImmOperands";
+}
+
+def ConstantImm64: Operand<i64> {
+ let ParserMatchClass = ConstantImmAsmOperandClass;
+}
+
def simm19_lsl2 : Operand<i32> {
let EncoderMethod = "getSimm19Lsl2Encoding";
let DecoderMethod = "DecodeSimm19Lsl2";
@@ -2947,10 +2957,10 @@ def : MipsInstAlias<"nor\t$rs, $imm", (NORImm GPR32Opnd:$rs, GPR32Opnd:$rs,
let hasDelaySlot = 1, isCTI = 1 in {
def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
- (ins imm64:$imm64, brtarget:$offset),
+ (ins ConstantImm64:$imm64, brtarget:$offset),
"bne\t$rt, $imm64, $offset">;
def BeqImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
- (ins imm64:$imm64, brtarget:$offset),
+ (ins ConstantImm64:$imm64, brtarget:$offset),
"beq\t$rt, $imm64, $offset">;
class CondBranchPseudo<string instr_asm> :
@@ -2978,7 +2988,7 @@ def BGTUL: CondBranchPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
let isCTI = 1 in
class CondBranchImmPseudo<string instr_asm> :
- MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
+ MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, ConstantImm64:$imm, brtarget:$offset),
!strconcat(instr_asm, "\t$rs, $imm, $offset")>;
def BEQLImmMacro : CondBranchImmPseudo<"beql">, ISA_MIPS2_NOT_32R6_64R6;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 40c05e8..5ceb477 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1520,6 +1520,8 @@ def HasVendorXqcics
: Predicate<"Subtarget->hasVendorXqcics()">,
AssemblerPredicate<(all_of FeatureVendorXqcics),
"'Xqcics' (Qualcomm uC Conditional Select Extension)">;
+def NoVendorXqcics
+ : Predicate<"!Subtarget->hasVendorXqcics()">;
def FeatureVendorXqcicsr
: RISCVExperimentalExtension<0, 4, "Qualcomm uC CSR Extension">;
@@ -1823,6 +1825,11 @@ def TuneConditionalCompressedMoveFusion
def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">;
+def TuneHasSingleElementVecFP64
+ : SubtargetFeature<"single-element-vec-fp64", "HasSingleElementVectorFP64", "true",
+ "Certain vector FP64 operations produce a single result "
+ "element per cycle">;
+
def TuneMIPSP8700
: SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700",
"MIPS p8700 processor">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index f2724c41..5e1d07a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1571,35 +1571,42 @@ def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5nonzero>;
}
let Predicates = [HasVendorXqcicli, IsRV32] in {
-def : QCILICCPat<SETEQ, QC_LIEQ>;
-def : QCILICCPat<SETNE, QC_LINE>;
def : QCILICCPat<SETLT, QC_LILT>;
def : QCILICCPat<SETGE, QC_LIGE>;
def : QCILICCPat<SETULT, QC_LILTU>;
def : QCILICCPat<SETUGE, QC_LIGEU>;
-def : QCILICCIPat<SETEQ, QC_LIEQI, simm5>;
-def : QCILICCIPat<SETNE, QC_LINEI, simm5>;
def : QCILICCIPat<SETLT, QC_LILTI, simm5>;
def : QCILICCIPat<SETGE, QC_LIGEI, simm5>;
def : QCILICCIPat<SETULT, QC_LILTUI, uimm5>;
def : QCILICCIPat<SETUGE, QC_LIGEUI, uimm5>;
-def : QCILICCPatInv<SETNE, QC_LIEQ>;
-def : QCILICCPatInv<SETEQ, QC_LINE>;
def : QCILICCPatInv<SETGE, QC_LILT>;
def : QCILICCPatInv<SETLT, QC_LIGE>;
def : QCILICCPatInv<SETUGE, QC_LILTU>;
def : QCILICCPatInv<SETULT, QC_LIGEU>;
-def : QCILICCIPatInv<SETNE, QC_LIEQI, simm5>;
-def : QCILICCIPatInv<SETEQ, QC_LINEI, simm5>;
def : QCILICCIPatInv<SETGE, QC_LILTI, simm5>;
def : QCILICCIPatInv<SETLT, QC_LIGEI, simm5>;
def : QCILICCIPatInv<SETUGE, QC_LILTUI, uimm5>;
def : QCILICCIPatInv<SETULT, QC_LIGEUI, uimm5>;
} // Predicates = [HasVendorXqcicli, IsRV32]
+// Prioritize Xqcics over these patterns.
+let Predicates = [HasVendorXqcicli, NoVendorXqcics, IsRV32] in {
+def : QCILICCPat<SETEQ, QC_LIEQ>;
+def : QCILICCPat<SETNE, QC_LINE>;
+
+def : QCILICCIPat<SETEQ, QC_LIEQI, simm5>;
+def : QCILICCIPat<SETNE, QC_LINEI, simm5>;
+
+def : QCILICCPatInv<SETNE, QC_LIEQ>;
+def : QCILICCPatInv<SETEQ, QC_LINE>;
+
+def : QCILICCIPatInv<SETNE, QC_LIEQI, simm5>;
+def : QCILICCIPatInv<SETEQ, QC_LINEI, simm5>;
+} // Predicates = [HasVendorXqcicli, NoVendorXqcics, IsRV32]
+
let Predicates = [HasVendorXqcics, IsRV32] in {
// (SELECT X, Y, Z) is canonicalised to `(riscv_selectcc x, 0, NE, y, z)`.
// These exist to prioritise over the `Select_GPR_Using_CC_GPR` pattern.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 6d86aff..3658817 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -14,6 +14,10 @@
// otherwise.
def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>;
+// This scheduling predicate is true when subtarget feature TuneHasSingleElementVecFP64
+// is enabled.
+def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>;
+
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
def isSEXT_W
: TIIPredicate<"isSEXT_W",
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 17a7948..e86431f 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -338,7 +338,8 @@ def SIFIVE_X390 : RISCVProcessorModel<"sifive-x390",
FeatureStdExtZvl1024b,
FeatureVendorXSiFivecdiscarddlone,
FeatureVendorXSiFivecflushdlone],
- SiFiveIntelligenceTuneFeatures>;
+ !listconcat(SiFiveIntelligenceTuneFeatures,
+ [TuneHasSingleElementVecFP64])>;
defvar SiFiveP400TuneFeatures = [TuneNoDefaultUnroll,
TuneConditionalCompressedMoveFusion,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 3e07eff..f863392a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN,
ProcResourceKind VL, ProcResourceKind VS,
ProcResourceKind VCQ,
SiFive7FPLatencies fpLatencies,
- bit isFP64Throttled = false,
bit hasFastGather = false> {
// Branching
@@ -832,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN,
// 13. Vector Floating-Point Instructions
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)),
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
- SiFive7GetCyclesDefault<mx>.c);
- defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8);
- defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2);
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
- }
- defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4);
- let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>;
- // min max require merge
- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
+ if !eq(sew, 64) then {
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF",
+ "WriteVFMulAddV", "WriteVFMulAddF"] in
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+ mx, sew, IsWorstCase>;
+ foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+ mx, sew, IsWorstCase>;
+ foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+ mx, sew, IsWorstCase>;
+ foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)],
+ mx, sew, IsWorstCase>;
+ } else {
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ }
+ let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in {
+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
+ // min max require merge
+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>;
+ }
}
}
}
@@ -892,19 +918,28 @@ multiclass SiFive7WriteResBase<int VLEN,
// Widening
foreach mx = SchedMxListW in {
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
- SiFive7GetCyclesDefault<mx>.c);
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
- defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
+ if !eq(sew, 32) then {
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+ mx, sew, IsWorstCase>;
+ } else {
+ let Latency = 8,
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
- defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
@@ -912,11 +947,19 @@ multiclass SiFive7WriteResBase<int VLEN,
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>;
}
- defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
- SiFive7GetCyclesDefault<mx>.c);
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ if !eq(sew, 32) then {
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+ mx, sew, IsWorstCase>;
+ } else {
+ let Latency = 8,
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in
+ defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ }
}
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
@@ -933,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN,
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
- defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)),
- SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c,
- SiFive7GetCyclesNarrowing<mx>.c);
defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
- defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c;
+ if !eq(sew, 32) then {
+ defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c;
+ foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in
+ defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred,
+ // Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)],
+ // Not Predicated
+ [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)],
+ mx, sew, IsWorstCase>;
+ } else {
+ let Latency = 8,
+ AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in {
+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>;
+ }
}
}
}
@@ -1499,7 +1552,6 @@ multiclass SiFive7ReadAdvance {
/// eventually be supplied by different SchedMachineModels.
multiclass SiFive7SchedResources<int vlen, bit extraVALU,
SiFive7FPLatencies fpLatencies,
- bit isFP64Throttled,
bit hasFastGather> {
defm SiFive7 : SiFive7ProcResources<extraVALU>;
@@ -1527,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
: SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB,
SiFive7IDiv, SiFive7FDiv, SiFive7VA1,
SiFive7VA1OrVA2, SiFive7VL, SiFive7VS,
- SiFive7VCQ, fpLatencies, isFP64Throttled,
- hasFastGather>;
+ SiFive7VCQ, fpLatencies, hasFastGather>;
//===----------------------------------------------------------------------===//
// Bypass and advance
@@ -1560,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
bit HasExtraVALU = false;
SiFive7FPLatencies FPLatencies;
- bit IsFP64Throttled = false;
bit HasFastGather = false;
string Name = !subst("Model", "", !subst("SiFive7", "", NAME));
@@ -1587,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
let HasExtraVALU = true;
let FPLatencies = SiFive7LowFPLatencies;
- let IsFP64Throttled = true;
let HasFastGather = true;
}
@@ -1596,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
let SchedModel = model in
defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
model.FPLatencies,
- model.IsFP64Throttled,
model.HasFastGather>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 01a4308..d11b446 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -128,6 +128,22 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred,
IsWorstCase>;
}
+multiclass LMULSEWWriteResMXSEWVariant<string name, SchedPredicateBase Pred,
+ list<ProcResourceKind> predResources,
+ int predLat, list<int> predAcquireCycles,
+ list<int> predReleaseCycles,
+ list<ProcResourceKind> noPredResources,
+ int noPredLat, list<int> noPredAcquireCycles,
+ list<int> noPredReleaseCycles,
+ string mx, int sew, bit IsWorstCase> {
+ defm "" : LMULWriteResVariantImpl<name, name # "_" # mx # "_E" # sew, Pred, predResources,
+ predLat, predAcquireCycles,
+ predReleaseCycles, noPredResources,
+ noPredLat, noPredAcquireCycles,
+ noPredReleaseCycles,
+ IsWorstCase>;
+}
+
// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
// SchedMxList variants above. Each multiclass is responsible for defining
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index e8c849e..28a1690 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -46,7 +46,6 @@
#include "SPIRVSubtarget.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
index 20f03b0..60d39c9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
index 278ad7c..e621bcd44 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStripConvergentIntrinsics.cpp
@@ -14,7 +14,6 @@
#include "SPIRV.h"
#include "SPIRVSubtarget.h"
#include "SPIRVUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Transforms/Utils/Cloning.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
index 1811492..5b149f8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9580ade..1cfcb1f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28,7 +28,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 3bc46af..6dd43b2 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -547,7 +547,7 @@ unsigned X86TargetLowering::getAddressSpace() const {
static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
- (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
+ TargetTriple.isAndroid();
}
static Constant* SegmentOffset(IRBuilderBase &IRB,
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index cfdfd94..5066a99 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1033,19 +1033,17 @@ private:
};
} // namespace
-namespace llvm {
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
template <>
-struct DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<typename CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
template <>
-struct DenseMapInfo<IndexCall>
+struct llvm::DenseMapInfo<IndexCall>
: public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
-} // end namespace llvm
namespace {
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index ac41fdd..2d5cb82 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -372,9 +372,7 @@ struct VTableSlot {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<VTableSlot> {
+template <> struct llvm::DenseMapInfo<VTableSlot> {
static VTableSlot getEmptyKey() {
return {DenseMapInfo<Metadata *>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -393,7 +391,7 @@ template <> struct DenseMapInfo<VTableSlot> {
}
};
-template <> struct DenseMapInfo<VTableSlotSummary> {
+template <> struct llvm::DenseMapInfo<VTableSlotSummary> {
static VTableSlotSummary getEmptyKey() {
return {DenseMapInfo<StringRef>::getEmptyKey(),
DenseMapInfo<uint64_t>::getEmptyKey()};
@@ -412,8 +410,6 @@ template <> struct DenseMapInfo<VTableSlotSummary> {
}
};
-} // end namespace llvm
-
// Returns true if the function must be unreachable based on ValueInfo.
//
// In particular, identifies a function as unreachable in the following
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9b272c4..3ddf182 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -28,6 +28,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
/// This is the complement of getICmpCode, which turns an opcode and two
/// operands into either a constant true or false, or a brand new ICmp
/// instruction. The sign is passed in to determine which kind of predicate to
@@ -1272,7 +1276,8 @@ Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) {
static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd, bool IsLogical,
InstCombiner::BuilderTy &Builder,
- const SimplifyQuery &Q) {
+ const SimplifyQuery &Q,
+ Instruction &I) {
// Match an equality compare with a non-poison constant as Cmp0.
// Also, give up if the compare can be constant-folded to avoid looping.
CmpPredicate Pred0;
@@ -1306,9 +1311,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);
}
- if (IsLogical)
- return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp)
- : Builder.CreateLogicalOr(Cmp0, SubstituteCmp);
+ if (IsLogical) {
+ Instruction *MDFrom =
+ ProfcheckDisableMetadataFixes && isa<SelectInst>(I) ? nullptr : &I;
+ return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp, "", MDFrom)
+ : Builder.CreateLogicalOr(Cmp0, SubstituteCmp, "", MDFrom);
+ }
return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,
SubstituteCmp);
}
@@ -3396,13 +3404,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
/*IsLogical*/ false, Builder))
return V;
- if (Value *V =
- foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q))
+ if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical,
+ Builder, Q, I))
return V;
// We can convert this case to bitwise and, because both operands are used
// on the LHS, and as such poison from both will propagate.
- if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd,
- /*IsLogical=*/false, Builder, Q)) {
+ if (Value *V = foldAndOrOfICmpsWithConstEq(
+ RHS, LHS, IsAnd, /*IsLogical=*/false, Builder, Q, I)) {
// If RHS is still used, we should drop samesign flag.
if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) {
RHS->setSameSign(false);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 56194fe..4c9b10a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2202,6 +2202,11 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return commonCastTransforms(CI);
}
+Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) {
+ // FIXME: Implement variants of ptrtoint folds.
+ return commonCastTransforms(CI);
+}
+
/// This input value (which is known to have vector type) is being zero extended
/// or truncated to the specified vector type. Since the zext/trunc is done
/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index e01c145..218aaf9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -143,6 +143,7 @@ public:
Instruction *visitUIToFP(CastInst &CI);
Instruction *visitSIToFP(CastInst &CI);
Instruction *visitPtrToInt(PtrToIntInst &CI);
+ Instruction *visitPtrToAddr(PtrToAddrInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 5c747bb..9815644 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1069,27 +1069,22 @@ struct LoweredPHIRecord {
};
} // namespace
-namespace llvm {
- template<>
- struct DenseMapInfo<LoweredPHIRecord> {
- static inline LoweredPHIRecord getEmptyKey() {
- return LoweredPHIRecord(nullptr, 0);
- }
- static inline LoweredPHIRecord getTombstoneKey() {
- return LoweredPHIRecord(nullptr, 1);
- }
- static unsigned getHashValue(const LoweredPHIRecord &Val) {
- return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
- (Val.Width>>3);
- }
- static bool isEqual(const LoweredPHIRecord &LHS,
- const LoweredPHIRecord &RHS) {
- return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
- LHS.Width == RHS.Width;
- }
- };
-} // namespace llvm
-
+template <> struct llvm::DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(nullptr, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(nullptr, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode *>::getHashValue(Val.PN) ^ (Val.Shift >> 3) ^
+ (Val.Width >> 3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width;
+ }
+};
/// This is an integer PHI and we know that it has an illegal type: see if it is
/// only used by trunc or trunc(lshr) operations. If so, we split the PHI into
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3704ad7..860f8f7 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -600,9 +600,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
!IsRISCV64 && !IsLoongArch64 &&
!(Mapping.Offset & (Mapping.Offset - 1)) &&
Mapping.Offset != kDynamicShadowSentinel;
- bool IsAndroidWithIfuncSupport =
- IsAndroid && !TargetTriple.isAndroidVersionLT(21);
- Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb;
+ Mapping.InGlobal = ClWithIfunc && IsAndroid && IsArmOrThumb;
return Mapping;
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 3f7003d..e5935f4 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -148,19 +148,16 @@ public:
class DFAJumpThreading {
public:
- DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
+ DFAJumpThreading(AssumptionCache *AC, DomTreeUpdater *DTU, LoopInfo *LI,
TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
- : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
+ : AC(AC), DTU(DTU), LI(LI), TTI(TTI), ORE(ORE) {}
bool run(Function &F);
bool LoopInfoBroken;
private:
void
- unfoldSelectInstrs(DominatorTree *DT,
- const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
- // TODO: Have everything use a single lazy DTU
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ unfoldSelectInstrs(const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
while (!Stack.empty()) {
@@ -168,7 +165,7 @@ private:
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
- unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
+ unfold(DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
// Put newly discovered select instructions into the work list.
llvm::append_range(Stack, NewSIsToUnfold);
@@ -181,7 +178,7 @@ private:
std::vector<BasicBlock *> *NewBBs);
AssumptionCache *AC;
- DominatorTree *DT;
+ DomTreeUpdater *DTU;
LoopInfo *LI;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
@@ -389,6 +386,22 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
+/// Helper to get the successor corresponding to a particular case value for
+/// a switch statement.
+static BasicBlock *getNextCaseSuccessor(SwitchInst *Switch,
+ const APInt &NextState) {
+ BasicBlock *NextCase = nullptr;
+ for (auto Case : Switch->cases()) {
+ if (Case.getCaseValue()->getValue() == NextState) {
+ NextCase = Case.getCaseSuccessor();
+ break;
+ }
+ }
+ if (!NextCase)
+ NextCase = Switch->getDefaultDest();
+ return NextCase;
+}
+
namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
@@ -401,6 +414,10 @@ struct ThreadingPath {
ExitVal = V->getValue();
IsExitValSet = true;
}
+ void setExitValue(const APInt &V) {
+ ExitVal = V;
+ IsExitValSet = true;
+ }
bool isExitValueSet() const { return IsExitValSet; }
/// Determinator is the basic block that determines the next state of the DFA.
@@ -583,44 +600,8 @@ struct AllSwitchPaths {
BasicBlock *getSwitchBlock() { return SwitchBlock; }
void run() {
- StateDefMap StateDef = getStateDefMap();
- if (StateDef.empty()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
- Switch)
- << "Switch instruction is not predictable.";
- });
- return;
- }
-
- auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
- auto *SwitchPhiDefBB = SwitchPhi->getParent();
- VisitedBlocks VB;
- // Get paths from the determinator BBs to SwitchPhiDefBB
- std::vector<ThreadingPath> PathsToPhiDef =
- getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
- if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
- TPaths = std::move(PathsToPhiDef);
- return;
- }
-
- assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
- auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
- // Find and append paths from SwitchPhiDefBB to SwitchBlock.
- PathsType PathsToSwitchBB =
- paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
- if (PathsToSwitchBB.empty())
- return;
-
- std::vector<ThreadingPath> TempList;
- for (const ThreadingPath &Path : PathsToPhiDef) {
- for (const PathType &PathToSw : PathsToSwitchBB) {
- ThreadingPath PathCopy(Path);
- PathCopy.appendExcludingFirst(PathToSw);
- TempList.push_back(PathCopy);
- }
- }
- TPaths = std::move(TempList);
+ findTPaths();
+ unifyTPaths();
}
private:
@@ -812,6 +793,69 @@ private:
return Res;
}
+ // Find all threadable paths.
+ void findTPaths() {
+ StateDefMap StateDef = getStateDefMap();
+ if (StateDef.empty()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+ Switch)
+ << "Switch instruction is not predictable.";
+ });
+ return;
+ }
+
+ auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
+ auto *SwitchPhiDefBB = SwitchPhi->getParent();
+ VisitedBlocks VB;
+ // Get paths from the determinator BBs to SwitchPhiDefBB
+ std::vector<ThreadingPath> PathsToPhiDef =
+ getPathsFromStateDefMap(StateDef, SwitchPhi, VB, MaxNumPaths);
+ if (SwitchPhiDefBB == SwitchBlock || PathsToPhiDef.empty()) {
+ TPaths = std::move(PathsToPhiDef);
+ return;
+ }
+
+ assert(MaxNumPaths >= PathsToPhiDef.size() && !PathsToPhiDef.empty());
+ auto PathsLimit = MaxNumPaths / PathsToPhiDef.size();
+ // Find and append paths from SwitchPhiDefBB to SwitchBlock.
+ PathsType PathsToSwitchBB =
+ paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1, PathsLimit);
+ if (PathsToSwitchBB.empty())
+ return;
+
+ std::vector<ThreadingPath> TempList;
+ for (const ThreadingPath &Path : PathsToPhiDef) {
+ for (const PathType &PathToSw : PathsToSwitchBB) {
+ ThreadingPath PathCopy(Path);
+ PathCopy.appendExcludingFirst(PathToSw);
+ TempList.push_back(PathCopy);
+ }
+ }
+ TPaths = std::move(TempList);
+ }
+
+ // Two states are equivalent if they have the same switch destination.
+ // Unify the states in different threading path if the states are equivalent.
+ void unifyTPaths() {
+ llvm::SmallDenseMap<BasicBlock *, APInt> DestToState;
+ for (ThreadingPath &Path : TPaths) {
+ APInt NextState = Path.getExitValue();
+ BasicBlock *Dest = getNextCaseSuccessor(Switch, NextState);
+ auto StateIt = DestToState.find(Dest);
+ if (StateIt == DestToState.end()) {
+ DestToState.insert({Dest, NextState});
+ continue;
+ }
+
+ if (NextState != StateIt->second) {
+ LLVM_DEBUG(dbgs() << "Next state in " << Path << " is equivalent to "
+ << StateIt->second << "\n");
+ Path.setExitValue(StateIt->second);
+ }
+ }
+ }
+
unsigned NumVisited = 0;
SwitchInst *Switch;
BasicBlock *SwitchBlock;
@@ -822,11 +866,11 @@ private:
};
struct TransformDFA {
- TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
+ TransformDFA(AllSwitchPaths *SwitchPaths, DomTreeUpdater *DTU,
AssumptionCache *AC, TargetTransformInfo *TTI,
OptimizationRemarkEmitter *ORE,
SmallPtrSet<const Value *, 32> EphValues)
- : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
+ : SwitchPaths(SwitchPaths), DTU(DTU), AC(AC), TTI(TTI), ORE(ORE),
EphValues(EphValues) {}
bool run() {
@@ -1002,19 +1046,16 @@ private:
SmallPtrSet<BasicBlock *, 16> BlocksToClean;
BlocksToClean.insert_range(successors(SwitchBlock));
- {
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
- for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
- createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
- NumPaths++;
- }
-
- // After all paths are cloned, now update the last successor of the cloned
- // path so it skips over the switch statement
- for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
- updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
+ createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, DTU);
+ NumPaths++;
}
+ // After all paths are cloned, now update the last successor of the cloned
+ // path so it skips over the switch statement
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
+ updateLastSuccessor(TPath, DuplicateMap, DTU);
+
// For each instruction that was cloned and used outside, update its uses
updateSSA(NewDefs);
@@ -1118,7 +1159,7 @@ private:
}
// SSAUpdater handles phi placement and renaming uses with the appropriate
// value.
- SSAUpdate.RewriteAllUses(DT);
+ SSAUpdate.RewriteAllUses(&DTU->getDomTree());
}
/// Clones a basic block, and adds it to the CFG.
@@ -1335,28 +1376,13 @@ private:
return It != ClonedBBs.end() ? (*It).BB : nullptr;
}
- /// Helper to get the successor corresponding to a particular case value for
- /// a switch statement.
- BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
- BasicBlock *NextCase = nullptr;
- for (auto Case : Switch->cases()) {
- if (Case.getCaseValue()->getValue() == NextState) {
- NextCase = Case.getCaseSuccessor();
- break;
- }
- }
- if (!NextCase)
- NextCase = Switch->getDefaultDest();
- return NextCase;
- }
-
/// Returns true if IncomingBB is a predecessor of BB.
bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
return llvm::is_contained(predecessors(BB), IncomingBB);
}
AllSwitchPaths *SwitchPaths;
- DominatorTree *DT;
+ DomTreeUpdater *DTU;
AssumptionCache *AC;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
@@ -1399,7 +1425,7 @@ bool DFAJumpThreading::run(Function &F) {
<< "candidate for jump threading\n");
LLVM_DEBUG(SI->dump());
- unfoldSelectInstrs(DT, Switch.getSelectInsts());
+ unfoldSelectInstrs(Switch.getSelectInsts());
if (!Switch.getSelectInsts().empty())
MadeChanges = true;
@@ -1421,7 +1447,7 @@ bool DFAJumpThreading::run(Function &F) {
}
#ifdef NDEBUG
- LI->verify(*DT);
+ LI->verify(DTU->getDomTree());
#endif
SmallPtrSet<const Value *, 32> EphValues;
@@ -1429,13 +1455,15 @@ bool DFAJumpThreading::run(Function &F) {
CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
- TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
+ TransformDFA Transform(&SwitchPaths, DTU, AC, TTI, ORE, EphValues);
if (Transform.run())
MadeChanges = LoopInfoBroken = true;
}
+ DTU->flush();
+
#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Full));
+ assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full));
verifyFunction(F, &dbgs());
#endif
@@ -1450,7 +1478,9 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
OptimizationRemarkEmitter ORE(&F);
- DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ DFAJumpThreading ThreadImpl(&AC, &DTU, &LI, &TTI, &ORE);
if (!ThreadImpl.run(F))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 0f8cc6c..2afa7b7 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -108,7 +108,7 @@ struct SimpleValue {
// of instruction handled below (UnaryOperator, etc.).
if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
if (Function *F = CI->getCalledFunction()) {
- switch ((Intrinsic::ID)F->getIntrinsicID()) {
+ switch (F->getIntrinsicID()) {
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
@@ -154,9 +154,7 @@ struct SimpleValue {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<SimpleValue> {
+template <> struct llvm::DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -169,8 +167,6 @@ template <> struct DenseMapInfo<SimpleValue> {
static bool isEqual(SimpleValue LHS, SimpleValue RHS);
};
-} // end namespace llvm
-
/// Match a 'select' including an optional 'not's of the condition.
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
Value *&B,
@@ -509,9 +505,7 @@ struct CallValue {
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<CallValue> {
+template <> struct llvm::DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -524,8 +518,6 @@ template <> struct DenseMapInfo<CallValue> {
static bool isEqual(CallValue LHS, CallValue RHS);
};
-} // end namespace llvm
-
unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
Instruction *Inst = Val.Inst;
@@ -580,9 +572,7 @@ struct GEPValue {
} // namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<GEPValue> {
+template <> struct llvm::DenseMapInfo<GEPValue> {
static inline GEPValue getEmptyKey() {
return DenseMapInfo<Instruction *>::getEmptyKey();
}
@@ -595,8 +585,6 @@ template <> struct DenseMapInfo<GEPValue> {
static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
};
-} // end namespace llvm
-
unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
auto *GEP = cast<GetElementPtrInst>(Val.Inst);
if (Val.ConstantOffset.has_value())
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 638952a..3a8ade8 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -170,9 +170,7 @@ struct llvm::GVNPass::Expression {
}
};
-namespace llvm {
-
-template <> struct DenseMapInfo<GVNPass::Expression> {
+template <> struct llvm::DenseMapInfo<GVNPass::Expression> {
static inline GVNPass::Expression getEmptyKey() { return ~0U; }
static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
@@ -188,8 +186,6 @@ template <> struct DenseMapInfo<GVNPass::Expression> {
}
};
-} // end namespace llvm
-
/// Represents a particular available value that we know how to materialize.
/// Materialization of an AvailableValue never fails. An AvailableValue is
/// implicitly associated with a rematerialization point which is the
@@ -2084,13 +2080,6 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) {
return Changed;
}
-static bool hasUsersIn(Value *V, BasicBlock *BB) {
- return any_of(V->users(), [BB](User *U) {
- auto *I = dyn_cast<Instruction>(U);
- return I && I->getParent() == BB;
- });
-}
-
bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
Value *V = IntrinsicI->getArgOperand(0);
@@ -2149,85 +2138,7 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
}
Constant *True = ConstantInt::getTrue(V->getContext());
- bool Changed = false;
-
- for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
- BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
-
- // This property is only true in dominated successors, propagateEquality
- // will check dominance for us.
- Changed |= propagateEquality(V, True, Edge, false);
- }
-
- // We can replace assume value with true, which covers cases like this:
- // call void @llvm.assume(i1 %cmp)
- // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
- ReplaceOperandsWithMap[V] = True;
-
- // Similarly, after assume(!NotV) we know that NotV == false.
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))))
- ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
-
- // If we find an equality fact, canonicalize all dominated uses in this block
- // to one of the two values. We heuristically choice the "oldest" of the
- // two where age is determined by value number. (Note that propagateEquality
- // above handles the cross block case.)
- //
- // Key case to cover are:
- // 1)
- // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
- // call void @llvm.assume(i1 %cmp)
- // ret float %0 ; will change it to ret float 3.000000e+00
- // 2)
- // %load = load float, float* %addr
- // %cmp = fcmp oeq float %load, %0
- // call void @llvm.assume(i1 %cmp)
- // ret float %load ; will change it to ret float %0
- if (auto *CmpI = dyn_cast<CmpInst>(V)) {
- if (CmpI->isEquivalence()) {
- Value *CmpLHS = CmpI->getOperand(0);
- Value *CmpRHS = CmpI->getOperand(1);
- // Heuristically pick the better replacement -- the choice of heuristic
- // isn't terribly important here, but the fact we canonicalize on some
- // replacement is for exposing other simplifications.
- // TODO: pull this out as a helper function and reuse w/ existing
- // (slightly different) logic.
- if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
- std::swap(CmpLHS, CmpRHS);
- if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
- std::swap(CmpLHS, CmpRHS);
- if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
- (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
- // Move the 'oldest' value to the right-hand side, using the value
- // number as a proxy for age.
- uint32_t LVN = VN.lookupOrAdd(CmpLHS);
- uint32_t RVN = VN.lookupOrAdd(CmpRHS);
- if (LVN < RVN)
- std::swap(CmpLHS, CmpRHS);
- }
-
- // Handle degenerate case where we either haven't pruned a dead path or a
- // removed a trivial assume yet.
- if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
- return Changed;
-
- LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
- << *CmpLHS << " with "
- << *CmpRHS << " in block "
- << IntrinsicI->getParent()->getName() << "\n");
-
- // Setup the replacement map - this handles uses within the same block.
- if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
- ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
-
- // NOTE: The non-block local cases are handled by the call to
- // propagateEquality above; this block is just about handling the block
- // local cases. TODO: There's a bunch of logic in propagateEqualiy which
- // isn't duplicated for the block local case, can we share it somehow?
- }
- }
- return Changed;
+ return propagateEquality(V, True, IntrinsicI);
}
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
@@ -2526,39 +2437,28 @@ void GVNPass::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
- bool Changed = false;
- for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
- Use &Operand = Instr->getOperandUse(OpNum);
- auto It = ReplaceOperandsWithMap.find(Operand.get());
- if (It != ReplaceOperandsWithMap.end()) {
- const DataLayout &DL = Instr->getDataLayout();
- if (!canReplacePointersInUseIfEqual(Operand, It->second, DL))
- continue;
-
- LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
- << *It->second << " in instruction " << *Instr << '\n');
- Instr->setOperand(OpNum, It->second);
- Changed = true;
- }
- }
- return Changed;
-}
-
-/// The given values are known to be equal in every block
+/// The given values are known to be equal in every use
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-/// If DominatesByEdge is false, then it means that we will propagate the RHS
-/// value starting from the end of Root.Start.
-bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
- const BasicBlockEdge &Root,
- bool DominatesByEdge) {
+/// The Root may either be a basic block edge (for conditions) or an
+/// instruction (for assumes).
+bool GVNPass::propagateEquality(
+ Value *LHS, Value *RHS,
+ const std::variant<BasicBlockEdge, Instruction *> &Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
- // For speed, compute a conservative fast approximation to
- // DT->dominates(Root, Root.getEnd());
- const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
+ SmallVector<const BasicBlock *> DominatedBlocks;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ if (isOnlyReachableViaThisEdge(*Edge, DT))
+ DominatedBlocks.push_back(Edge->getEnd());
+ } else {
+ Instruction *I = std::get<Instruction *>(Root);
+ for (const auto *Node : DT->getNode(I->getParent())->children())
+ DominatedBlocks.push_back(Node->getBlock());
+ }
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2606,9 +2506,9 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// using the leader table is about compiling faster, not optimizing better).
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
- if (RootDominatesEnd && !isa<Instruction>(RHS) &&
- canReplacePointersIfEqual(LHS, RHS, DL))
- LeaderTable.insert(LVN, RHS, Root.getEnd());
+ if (!isa<Instruction>(RHS) && canReplacePointersIfEqual(LHS, RHS, DL))
+ for (const BasicBlock *BB : DominatedBlocks)
+ LeaderTable.insert(LVN, RHS, BB);
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2618,12 +2518,14 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
return canReplacePointersInUseIfEqual(U, To, DL);
};
- unsigned NumReplacements =
- DominatesByEdge
- ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
- CanReplacePointersCallBack)
- : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
- CanReplacePointersCallBack);
+ unsigned NumReplacements;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
+ NumReplacements = replaceDominatedUsesWithIf(
+ LHS, RHS, *DT, *Edge, CanReplacePointersCallBack);
+ else
+ NumReplacements = replaceDominatedUsesWithIf(
+ LHS, RHS, *DT, std::get<Instruction *>(Root),
+ CanReplacePointersCallBack);
if (NumReplacements > 0) {
Changed = true;
@@ -2682,26 +2584,45 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
- Value *NotCmp = findLeader(Root.getEnd(), Num);
- if (NotCmp && isa<Instruction>(NotCmp)) {
- unsigned NumReplacements =
- DominatesByEdge
- ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
- : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
- Root.getStart());
- Changed |= NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
- // Cached information for anything that uses NotCmp will be invalid.
- if (MD)
- MD->invalidateCachedPointerInfo(NotCmp);
+ for (const auto &Entry : LeaderTable.getLeaders(Num)) {
+ // Only look at leaders that either dominate the start of the edge,
+ // or are dominated by the end. This check is not necessary for
+ // correctness, it only discards cases for which the following
+ // use replacement will not work anyway.
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root)) {
+ if (!DT->dominates(Entry.BB, Edge->getStart()) &&
+ !DT->dominates(Edge->getEnd(), Entry.BB))
+ continue;
+ } else {
+ auto *InstBB = std::get<Instruction *>(Root)->getParent();
+ if (!DT->dominates(Entry.BB, InstBB) &&
+ !DT->dominates(InstBB, Entry.BB))
+ continue;
+ }
+
+ Value *NotCmp = Entry.Val;
+ if (NotCmp && isa<Instruction>(NotCmp)) {
+ unsigned NumReplacements;
+ if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(&Root))
+ NumReplacements =
+ replaceDominatedUsesWith(NotCmp, NotVal, *DT, *Edge);
+ else
+ NumReplacements = replaceDominatedUsesWith(
+ NotCmp, NotVal, *DT, std::get<Instruction *>(Root));
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ // Cached information for anything that uses NotCmp will be invalid.
+ if (MD)
+ MD->invalidateCachedPointerInfo(NotCmp);
+ }
}
}
// Ensure that any instruction in scope that gets the "A < B" value number
// is replaced with false.
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
- if (RootDominatesEnd)
- LeaderTable.insert(Num, NotVal, Root.getEnd());
+ for (const BasicBlock *BB : DominatedBlocks)
+ LeaderTable.insert(Num, NotVal, BB);
continue;
}
@@ -2789,11 +2710,11 @@ bool GVNPass::processInstruction(Instruction *I) {
Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
BasicBlockEdge TrueE(Parent, TrueSucc);
- Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
BasicBlockEdge FalseE(Parent, FalseSucc);
- Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
return Changed;
}
@@ -2814,7 +2735,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E, true);
+ Changed |= propagateEquality(SwitchCond, Case.getCaseValue(), E);
}
}
return Changed;
@@ -2942,8 +2863,6 @@ bool GVNPass::processBlock(BasicBlock *BB) {
if (DeadBlocks.count(BB))
return false;
- // Clearing map before every BB because it can be used only for single BB.
- ReplaceOperandsWithMap.clear();
bool ChangedFunction = false;
// Since we may not have visited the input blocks of the phis, we can't
@@ -2955,11 +2874,8 @@ bool GVNPass::processBlock(BasicBlock *BB) {
for (PHINode *PN : PHINodesToRemove) {
removeInstruction(PN);
}
- for (Instruction &Inst : make_early_inc_range(*BB)) {
- if (!ReplaceOperandsWithMap.empty())
- ChangedFunction |= replaceOperandsForInBlockEquality(&Inst);
+ for (Instruction &Inst : make_early_inc_range(*BB))
ChangedFunction |= processInstruction(&Inst);
- }
return ChangedFunction;
}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 3c1a8ba..80aa98d 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -434,10 +434,6 @@ private:
int StoreCount = 0;
};
-} // end anonymous namespace
-
-namespace llvm {
-
struct ExactEqualsExpression {
const Expression &E;
@@ -449,8 +445,9 @@ struct ExactEqualsExpression {
return E.exactlyEquals(Other);
}
};
+} // end anonymous namespace
-template <> struct DenseMapInfo<const Expression *> {
+template <> struct llvm::DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
@@ -493,8 +490,6 @@ template <> struct DenseMapInfo<const Expression *> {
}
};
-} // end namespace llvm
-
namespace {
class NewGVN {
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 2190dcd..a87822c 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -84,10 +84,6 @@ public:
bool run();
};
-} // anonymous namespace
-
-namespace llvm {
-
struct FrozenIndPHIInfo {
// A freeze instruction that uses an induction phi
FreezeInst *FI = nullptr;
@@ -103,7 +99,9 @@ struct FrozenIndPHIInfo {
bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; }
};
-template <> struct DenseMapInfo<FrozenIndPHIInfo> {
+} // namespace
+
+template <> struct llvm::DenseMapInfo<FrozenIndPHIInfo> {
static inline FrozenIndPHIInfo getEmptyKey() {
return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(),
DenseMapInfo<BinaryOperator *>::getEmptyKey());
@@ -124,8 +122,6 @@ template <> struct DenseMapInfo<FrozenIndPHIInfo> {
};
};
-} // end namespace llvm
-
// Given U = (value, user), replace value with freeze(value), and let
// SCEV forget user. The inserted freeze is placed in the preheader.
void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) {
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b6ca52e..46f2903 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3246,6 +3246,13 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
return ::replaceDominatedUsesWith(From, To, Dominates);
}
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const Instruction *I) {
+ auto Dominates = [&](const Use &U) { return DT.dominates(I, U); };
+ return ::replaceDominatedUsesWith(From, To, Dominates);
+}
+
unsigned llvm::replaceDominatedUsesWithIf(
Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root,
function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
@@ -3264,6 +3271,15 @@ unsigned llvm::replaceDominatedUsesWithIf(
return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
}
+unsigned llvm::replaceDominatedUsesWithIf(
+ Value *From, Value *To, DominatorTree &DT, const Instruction *I,
+ function_ref<bool(const Use &U, const Value *To)> ShouldReplace) {
+ auto DominatesAndShouldReplace = [&](const Use &U) {
+ return DT.dominates(I, U) && ShouldReplace(U, To);
+ };
+ return ::replaceDominatedUsesWith(From, To, DominatesAndShouldReplace);
+}
+
bool llvm::callsGCLeafFunction(const CallBase *Call,
const TargetLibraryInfo &TLI) {
// Check if the function is specifically marked as a gc leaf function.
diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index ff2ab3c..cecb662 100644
--- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -27,15 +27,15 @@ using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
namespace {
- class LowerInvokeLegacyPass : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
- initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- };
-}
+class LowerInvokeLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
+ initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
char LowerInvokeLegacyPass::ID = 0;
INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
@@ -78,11 +78,12 @@ bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
return runImpl(F);
}
-namespace llvm {
-char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
+char &llvm::LowerInvokePassID = LowerInvokeLegacyPass::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
+FunctionPass *llvm::createLowerInvokePass() {
+ return new LowerInvokeLegacyPass();
+}
PreservedAnalyses LowerInvokePass::run(Function &F,
FunctionAnalysisManager &AM) {
@@ -92,4 +93,3 @@ PreservedAnalyses LowerInvokePass::run(Function &F,
return PreservedAnalyses::none();
}
-}
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
index ca7e09d..1585e9e 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -48,8 +48,6 @@
using namespace llvm;
using namespace misexpect;
-namespace llvm {
-
// Command line option to enable/disable the warning when profile data suggests
// a mismatch with the use of the llvm.expect intrinsic
static cl::opt<bool> PGOWarnMisExpect(
@@ -63,22 +61,18 @@ static cl::opt<uint32_t> MisExpectTolerance(
cl::desc("Prevents emitting diagnostics when profile counts are "
"within N% of the threshold.."));
-} // namespace llvm
-
-namespace {
-
-bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
+static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) {
return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
}
-uint32_t getMisExpectTolerance(LLVMContext &Ctx) {
+static uint32_t getMisExpectTolerance(const LLVMContext &Ctx) {
return std::max(static_cast<uint32_t>(MisExpectTolerance),
Ctx.getDiagnosticsMisExpectTolerance());
}
-Instruction *getInstCondition(Instruction *I) {
+static const Instruction *getInstCondition(const Instruction *I) {
assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
- Instruction *Ret = nullptr;
+ const Instruction *Ret = nullptr;
if (auto *B = dyn_cast<BranchInst>(I)) {
Ret = dyn_cast<Instruction>(B->getCondition());
}
@@ -97,8 +91,8 @@ Instruction *getInstCondition(Instruction *I) {
return Ret ? Ret : I;
}
-void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
- uint64_t ProfCount, uint64_t TotalCount) {
+static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
double PercentageCorrect = (double)ProfCount / TotalCount;
auto PerString =
formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
@@ -106,20 +100,16 @@ void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
"Potential performance regression from use of the llvm.expect intrinsic: "
"Annotation was correct on {0} of profiled executions.",
PerString);
- Instruction *Cond = getInstCondition(I);
+ const Instruction *Cond = getInstCondition(I);
if (isMisExpectDiagEnabled(Ctx))
Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString)));
OptimizationRemarkEmitter ORE(I->getParent()->getParent());
ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
}
-} // namespace
-
-namespace llvm {
-namespace misexpect {
-
-void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
- ArrayRef<uint32_t> ExpectedWeights) {
+void misexpect::verifyMisExpect(const Instruction &I,
+ ArrayRef<uint32_t> RealWeights,
+ ArrayRef<uint32_t> ExpectedWeights) {
// To determine if we emit a diagnostic, we need to compare the branch weights
// from the profile to those added by the llvm.expect intrinsic.
// So first, we extract the "likely" and "unlikely" weights from
@@ -128,15 +118,13 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
uint64_t LikelyBranchWeight = 0,
UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
size_t MaxIndex = 0;
- for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
- uint32_t V = ExpectedWeights[Idx];
+ for (const auto &[Idx, V] : enumerate(ExpectedWeights)) {
if (LikelyBranchWeight < V) {
LikelyBranchWeight = V;
MaxIndex = Idx;
}
- if (UnlikelyBranchWeight > V) {
+ if (UnlikelyBranchWeight > V)
UnlikelyBranchWeight = V;
- }
}
const uint64_t ProfiledWeight = RealWeights[MaxIndex];
@@ -161,7 +149,7 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
// clamp tolerance range to [0, 100)
- auto Tolerance = getMisExpectTolerance(I.getContext());
+ uint32_t Tolerance = getMisExpectTolerance(I.getContext());
Tolerance = std::clamp(Tolerance, 0u, 99u);
// Allow users to relax checking by N% i.e., if they use a 5% tolerance,
@@ -175,8 +163,8 @@ void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
RealWeightsTotal);
}
-void checkBackendInstrumentation(Instruction &I,
- const ArrayRef<uint32_t> RealWeights) {
+void misexpect::checkBackendInstrumentation(const Instruction &I,
+ ArrayRef<uint32_t> RealWeights) {
// Backend checking assumes any existing weight comes from an `llvm.expect`
// intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple
// times, leading to an invalid assumption in our checking. Backend checks
@@ -190,24 +178,19 @@ void checkBackendInstrumentation(Instruction &I,
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
-void checkFrontendInstrumentation(Instruction &I,
- const ArrayRef<uint32_t> ExpectedWeights) {
+void misexpect::checkFrontendInstrumentation(
+ const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) {
SmallVector<uint32_t> RealWeights;
if (!extractBranchWeights(I, RealWeights))
return;
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
-void checkExpectAnnotations(Instruction &I,
- const ArrayRef<uint32_t> ExistingWeights,
- bool IsFrontend) {
- if (IsFrontend) {
+void misexpect::checkExpectAnnotations(const Instruction &I,
+ ArrayRef<uint32_t> ExistingWeights,
+ bool IsFrontend) {
+ if (IsFrontend)
checkFrontendInstrumentation(I, ExistingWeights);
- } else {
+ else
checkBackendInstrumentation(I, ExistingWeights);
- }
}
-
-} // namespace misexpect
-} // namespace llvm
-#undef DEBUG_TYPE
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 155fcc5..d831c27 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5959,7 +5959,11 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
- for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ unsigned E = PreviousEdges - 1;
+ // Remove all incoming values from OtherDest if OtherDest is unreachable.
+ if (NewBI->isUnconditional())
+ ++E;
+ for (unsigned I = 0; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
@@ -7736,8 +7740,7 @@ struct SwitchSuccWrapper {
DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
};
-namespace llvm {
-template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
+template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
static const SwitchSuccWrapper *getEmptyKey() {
return static_cast<SwitchSuccWrapper *>(
DenseMapInfo<void *>::getEmptyKey());
@@ -7805,7 +7808,6 @@ template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
return true;
}
};
-} // namespace llvm
bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
DomTreeUpdater *DTU) {