aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp49
-rw-r--r--llvm/lib/Analysis/StaticDataProfileInfo.cpp40
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AIXException.cpp4
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp4
-rw-r--r--llvm/lib/CodeGen/BasicBlockPathCloning.cpp4
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp4
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp4
-rw-r--r--llvm/lib/CodeGen/EdgeBundles.cpp11
-rw-r--r--llvm/lib/CodeGen/ExpandFp.cpp134
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp38
-rw-r--r--llvm/lib/CodeGen/GlobalMergeFunctions.cpp10
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp6
-rw-r--r--llvm/lib/CodeGen/MIR2Vec.cpp13
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRNamerPass.cpp17
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp6
-rw-r--r--llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp9
-rw-r--r--llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp18
-rw-r--r--llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp9
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp62
-rw-r--r--llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp13
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp104
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp7
-rw-r--r--llvm/lib/CodeGen/NonRelocatableStringpool.cpp4
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp4
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp6
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGPrinter.cpp80
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp33
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp5
-rw-r--r--llvm/lib/CodeGen/StaticDataAnnotator.cpp15
-rw-r--r--llvm/lib/CodeGen/StaticDataSplitter.cpp6
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp3
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp17
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp6
-rw-r--r--llvm/lib/IR/ConstantFPRange.cpp166
-rw-r--r--llvm/lib/IR/Constants.cpp7
-rw-r--r--llvm/lib/IR/Core.cpp11
-rw-r--r--llvm/lib/IR/IRBuilder.cpp13
-rw-r--r--llvm/lib/IR/Instructions.cpp16
-rw-r--r--llvm/lib/IR/Verifier.cpp2
-rw-r--r--llvm/lib/Support/DebugCounter.cpp56
-rw-r--r--llvm/lib/Support/VirtualOutputBackends.cpp2
-rw-r--r--llvm/lib/TableGen/Record.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp70
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp132
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h31
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td17
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td16
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td34
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp34
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h34
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h66
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp205
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td143
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrPredicates.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp21
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td68
-rw-r--r--llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp16
-rw-r--r--llvm/lib/TargetParser/RISCVTargetParser.cpp4
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp4
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCloner.h9
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h9
-rw-r--r--llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Coroutines/SpillUtils.cpp37
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfUse.cpp55
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp67
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp105
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp21
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp8
-rw-r--r--llvm/lib/XRay/BlockIndexer.cpp7
-rw-r--r--llvm/lib/XRay/BlockPrinter.cpp7
-rw-r--r--llvm/lib/XRay/BlockVerifier.cpp18
-rw-r--r--llvm/lib/XRay/FDRRecordProducer.cpp14
-rw-r--r--llvm/lib/XRay/FDRRecords.cpp7
-rw-r--r--llvm/lib/XRay/FDRTraceExpander.cpp7
-rw-r--r--llvm/lib/XRay/FDRTraceWriter.cpp12
-rw-r--r--llvm/lib/XRay/FileHeaderReader.cpp12
-rw-r--r--llvm/lib/XRay/LogBuilderConsumer.cpp7
-rw-r--r--llvm/lib/XRay/Profile.cpp18
-rw-r--r--llvm/lib/XRay/RecordInitializer.cpp7
-rw-r--r--llvm/lib/XRay/RecordPrinter.cpp7
-rw-r--r--llvm/lib/XRay/Trace.cpp18
121 files changed, 1927 insertions, 839 deletions
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b5b4cd9..00c3dbb 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5419,20 +5419,15 @@ static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
if (SourceBits != NewBits)
return nullptr;
- const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
- const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
- if (!SExt && !ZExt)
- return nullptr;
- const SCEVTruncateExpr *Trunc =
- SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
- : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
- if (!Trunc)
- return nullptr;
- const SCEV *X = Trunc->getOperand();
- if (X != SymbolicPHI)
- return nullptr;
- Signed = SExt != nullptr;
- return Trunc->getType();
+ if (match(Op, m_scev_SExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) {
+ Signed = true;
+ return cast<SCEVCastExpr>(Op)->getOperand()->getType();
+ }
+ if (match(Op, m_scev_ZExt(m_scev_Trunc(m_scev_Specific(SymbolicPHI))))) {
+ Signed = false;
+ return cast<SCEVCastExpr>(Op)->getOperand()->getType();
+ }
+ return nullptr;
}
static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
@@ -15428,20 +15423,18 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
// Try to match 'zext (trunc A to iB) to iY', which is used
// for URem with constant power-of-2 second operands. Make sure the size of
// the operand A matches the size of the whole expressions.
- if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
- if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
- LHS = Trunc->getOperand();
- // Bail out if the type of the LHS is larger than the type of the
- // expression for now.
- if (getTypeSizeInBits(LHS->getType()) >
- getTypeSizeInBits(Expr->getType()))
- return false;
- if (LHS->getType() != Expr->getType())
- LHS = getZeroExtendExpr(LHS, Expr->getType());
- RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
- << getTypeSizeInBits(Trunc->getType()));
- return true;
- }
+ if (match(Expr, m_scev_ZExt(m_scev_Trunc(m_SCEV(LHS))))) {
+ Type *TruncTy = cast<SCEVZeroExtendExpr>(Expr)->getOperand()->getType();
+ // Bail out if the type of the LHS is larger than the type of the
+ // expression for now.
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(Expr->getType()))
+ return false;
+ if (LHS->getType() != Expr->getType())
+ LHS = getZeroExtendExpr(LHS, Expr->getType());
+ RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
+ << getTypeSizeInBits(TruncTy));
+ return true;
+ }
const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
if (Add == nullptr || Add->getNumOperands() != 2)
return false;
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index b036b2d..1f751ee 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -6,6 +6,46 @@
#include "llvm/ProfileData/InstrProf.h"
using namespace llvm;
+
+namespace llvm {
+namespace memprof {
+// Returns true iff the global variable has custom section either by
+// __attribute__((section("name")))
+// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
+// or #pragma clang section directives
+// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
+static bool hasExplicitSectionName(const GlobalVariable &GVar) {
+ if (GVar.hasSection())
+ return true;
+
+ auto Attrs = GVar.getAttributes();
+ if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
+ Attrs.hasAttribute("relro-section") ||
+ Attrs.hasAttribute("rodata-section"))
+ return true;
+ return false;
+}
+
+AnnotationKind getAnnotationKind(const GlobalVariable &GV) {
+ if (GV.isDeclarationForLinker())
+ return AnnotationKind::DeclForLinker;
+ // Skip 'llvm.'-prefixed global variables conservatively because they are
+ // often handled specially,
+ StringRef Name = GV.getName();
+ if (Name.starts_with("llvm."))
+ return AnnotationKind::ReservedName;
+ // Respect user-specified custom data sections.
+ if (hasExplicitSectionName(GV))
+ return AnnotationKind::ExplicitSection;
+ return AnnotationKind::AnnotationOK;
+}
+
+bool IsAnnotationOK(const GlobalVariable &GV) {
+ return getAnnotationKind(GV) == AnnotationKind::AnnotationOK;
+}
+} // namespace memprof
+} // namespace llvm
+
void StaticDataProfileInfo::addConstantProfileCount(
const Constant *C, std::optional<uint64_t> Count) {
if (!Count) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 6356d71..873ac8f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -20,7 +20,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-namespace llvm {
+using namespace llvm;
AIXException::AIXException(AsmPrinter *A) : EHStreamer(A) {}
@@ -90,5 +90,3 @@ void AIXException::endFunction(const MachineFunction *MF) {
emitExceptionInfoTable(LSDALabel, PerSym);
}
-
-} // End of namespace llvm
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 260ce8f..93ae548 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -85,8 +85,7 @@ template <> struct llvm::DenseMapInfo<VariableID> {
using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>;
-namespace std {
-template <> struct hash<VarLocInsertPt> {
+template <> struct std::hash<VarLocInsertPt> {
using argument_type = VarLocInsertPt;
using result_type = std::size_t;
@@ -94,7 +93,6 @@ template <> struct hash<VarLocInsertPt> {
return std::hash<void *>()(Arg.getOpaqueValue());
}
};
-} // namespace std
/// Helper class to build FunctionVarLocs, since that class isn't easy to
/// modify. TODO: There's not a great deal of value in the split, it could be
diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
index fd7df6b..47b7a88 100644
--- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
+++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
@@ -207,9 +207,7 @@ bool ApplyCloning(MachineFunction &MF,
}
return AnyPathsCloned;
}
-} // end anonymous namespace
-namespace llvm {
class BasicBlockPathCloning : public MachineFunctionPass {
public:
static char ID;
@@ -229,7 +227,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-} // namespace llvm
+} // namespace
char BasicBlockPathCloning::ID = 0;
INITIALIZE_PASS_BEGIN(
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 28e6728..1846880 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -31,7 +31,7 @@
using namespace llvm;
-namespace llvm {
+namespace {
class BreakFalseDeps : public MachineFunctionPass {
private:
@@ -95,7 +95,7 @@ private:
void processUndefReads(MachineBasicBlock *);
};
-} // namespace llvm
+} // namespace
#define DEBUG_TYPE "break-false-deps"
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 6c2a5a7..87ada87 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -126,8 +126,7 @@ hash_code hash_value(const ComplexValue &Arg) {
} // end namespace
typedef SmallVector<struct ComplexValue, 2> ComplexValues;
-namespace llvm {
-template <> struct DenseMapInfo<ComplexValue> {
+template <> struct llvm::DenseMapInfo<ComplexValue> {
static inline ComplexValue getEmptyKey() {
return {DenseMapInfo<Value *>::getEmptyKey(),
DenseMapInfo<Value *>::getEmptyKey()};
@@ -144,7 +143,6 @@ template <> struct DenseMapInfo<ComplexValue> {
return LHS.Real == RHS.Real && LHS.Imag == RHS.Imag;
}
};
-} // end namespace llvm
namespace {
template <typename T, typename IterT>
diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp
index f4335396..50dd66f 100644
--- a/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -81,13 +81,10 @@ void EdgeBundles::init() {
}
}
-namespace llvm {
-
/// Specialize WriteGraph, the standard implementation won't work.
-template<>
-raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
- bool ShortNames,
- const Twine &Title) {
+template <>
+raw_ostream &llvm::WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames, const Twine &Title) {
const MachineFunction *MF = G.getMachineFunction();
O << "digraph {\n";
@@ -107,8 +104,6 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
return O;
}
-} // end namespace llvm
-
/// view - Visualize the annotated bipartite CFG with Graphviz.
void EdgeBundles::view() const {
ViewGraph(*this, "EdgeBundles");
diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 9cc6c6a..04c7008 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -82,7 +82,7 @@ public:
}
static FRemExpander create(IRBuilder<> &B, Type *Ty) {
- assert(canExpandType(Ty));
+ assert(canExpandType(Ty) && "Expected supported floating point type");
// The type to use for the computation of the remainder. This may be
// wider than the input/result type which affects the ...
@@ -356,8 +356,9 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y,
static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
- Type *ReturnTy = I.getType();
- assert(FRemExpander::canExpandType(ReturnTy->getScalarType()));
+ Type *Ty = I.getType();
+ assert(FRemExpander::canExpandType(Ty) &&
+ "Expected supported floating point type");
FastMathFlags FMF = I.getFastMathFlags();
// TODO Make use of those flags for optimization?
@@ -368,32 +369,10 @@ static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
B.setFastMathFlags(FMF);
B.SetCurrentDebugLocation(I.getDebugLoc());
- Type *ElemTy = ReturnTy->getScalarType();
- const FRemExpander Expander = FRemExpander::create(B, ElemTy);
-
- Value *Ret;
- if (ReturnTy->isFloatingPointTy())
- Ret = FMF.approxFunc()
- ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
- : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
- else {
- auto *VecTy = cast<FixedVectorType>(ReturnTy);
-
- // This could use SplitBlockAndInsertForEachLane but the interface
- // is a bit awkward for a constant number of elements and it will
- // boil down to the same code.
- // TODO Expand the FRem instruction only once and reuse the code.
- Value *Nums = I.getOperand(0);
- Value *Denums = I.getOperand(1);
- Ret = PoisonValue::get(I.getType());
- for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) {
- Value *Num = B.CreateExtractElement(Nums, I);
- Value *Denum = B.CreateExtractElement(Denums, I);
- Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum)
- : Expander.buildFRem(Num, Denum, SQ);
- Ret = B.CreateInsertElement(Ret, Rem, I);
- }
- }
+ const FRemExpander Expander = FRemExpander::create(B, Ty);
+ Value *Ret = FMF.approxFunc()
+ ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
+ : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
I.replaceAllUsesWith(Ret);
Ret->takeName(&I);
@@ -939,7 +918,8 @@ static void expandIToFP(Instruction *IToFP) {
IToFP->eraseFromParent();
}
-static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
+static void scalarize(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist) {
VectorType *VTy = cast<FixedVectorType>(I->getType());
IRBuilder<> Builder(I);
@@ -948,12 +928,25 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
Value *Result = PoisonValue::get(VTy);
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
- Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
- I->getType()->getScalarType());
- Result = Builder.CreateInsertElement(Result, Cast, Idx);
- if (isa<Instruction>(Cast))
- Replace.push_back(cast<Instruction>(Cast));
+
+ Value *NewOp = nullptr;
+ if (auto *BinOp = dyn_cast<BinaryOperator>(I))
+ NewOp = Builder.CreateBinOp(
+ BinOp->getOpcode(), Ext,
+ Builder.CreateExtractElement(I->getOperand(1), Idx));
+ else if (auto *CastI = dyn_cast<CastInst>(I))
+ NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
+ I->getType()->getScalarType());
+ else
+ llvm_unreachable("Unsupported instruction type");
+
+ Result = Builder.CreateInsertElement(Result, NewOp, Idx);
+ if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
+ ScalarizedI->copyIRFlags(I, true);
+ Worklist.push_back(ScalarizedI);
+ }
}
+
I->replaceAllUsesWith(Result);
I->dropAllReferences();
I->eraseFromParent();
@@ -989,10 +982,17 @@ static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
}
+static void addToWorklist(Instruction &I,
+ SmallVector<Instruction *, 4> &Worklist) {
+ if (I.getOperand(0)->getType()->isVectorTy())
+ scalarize(&I, Worklist);
+ else
+ Worklist.push_back(&I);
+}
+
static bool runImpl(Function &F, const TargetLowering &TLI,
AssumptionCache *AC) {
- SmallVector<Instruction *, 4> Replace;
- SmallVector<Instruction *, 4> ReplaceVector;
+ SmallVector<Instruction *, 4> Worklist;
bool Modified = false;
unsigned MaxLegalFpConvertBitWidth =
@@ -1003,55 +1003,39 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
return false;
- for (auto &I : instructions(F)) {
- switch (I.getOpcode()) {
- case Instruction::FRem: {
- Type *Ty = I.getType();
- // TODO: This pass doesn't handle scalable vectors.
- if (Ty->isScalableTy())
- continue;
-
- if (targetSupportsFrem(TLI, Ty) ||
- !FRemExpander::canExpandType(Ty->getScalarType()))
- continue;
-
- Replace.push_back(&I);
- Modified = true;
+ for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
+ Instruction &I = *It++;
+ Type *Ty = I.getType();
+ // TODO: This pass doesn't handle scalable vectors.
+ if (Ty->isScalableTy())
+ continue;
+ switch (I.getOpcode()) {
+ case Instruction::FRem:
+ if (!targetSupportsFrem(TLI, Ty) &&
+ FRemExpander::canExpandType(Ty->getScalarType())) {
+ addToWorklist(I, Worklist);
+ Modified = true;
+ }
break;
- }
case Instruction::FPToUI:
case Instruction::FPToSI: {
- // TODO: This pass doesn't handle scalable vectors.
- if (I.getOperand(0)->getType()->isScalableTy())
- continue;
-
- auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
+ auto *IntTy = cast<IntegerType>(Ty->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- if (I.getOperand(0)->getType()->isVectorTy())
- ReplaceVector.push_back(&I);
- else
- Replace.push_back(&I);
+ addToWorklist(I, Worklist);
Modified = true;
break;
}
case Instruction::UIToFP:
case Instruction::SIToFP: {
- // TODO: This pass doesn't handle scalable vectors.
- if (I.getOperand(0)->getType()->isScalableTy())
- continue;
-
auto *IntTy =
cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
continue;
- if (I.getOperand(0)->getType()->isVectorTy())
- ReplaceVector.push_back(&I);
- else
- Replace.push_back(&I);
+ addToWorklist(I, Worklist);
Modified = true;
break;
}
@@ -1060,16 +1044,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
}
}
- while (!ReplaceVector.empty()) {
- Instruction *I = ReplaceVector.pop_back_val();
- scalarize(I, Replace);
- }
-
- if (Replace.empty())
- return false;
-
- while (!Replace.empty()) {
- Instruction *I = Replace.pop_back_val();
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
if (I->getOpcode() == Instruction::FRem) {
auto SQ = [&]() -> std::optional<SimplifyQuery> {
if (AC) {
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 90c60d4..3812823 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -1975,6 +1975,44 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
break;
}
+ case TargetOpcode::G_SUB: {
+ Register Src2 = MI.getOperand(2).getReg();
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Src2NumSignBits == 1)
+ return 1; // Early out.
+
+ // Handle NEG.
+ Register Src1 = MI.getOperand(1).getReg();
+ KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
+ if (Known1.isZero()) {
+ KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known2.Zero | 1).isAllOnes())
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has, at worst, the same number of sign bits as
+ // the input.
+ if (Known2.isNonNegative()) {
+ FirstAnswer = Src2NumSignBits;
+ break;
+ }
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Src1NumSignBits == 1)
+ return 1; // Early Out.
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
index 47640c4a..81ab317 100644
--- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
+++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp
@@ -587,16 +587,12 @@ public:
} // namespace
char GlobalMergeFuncPassWrapper::ID = 0;
-INITIALIZE_PASS_BEGIN(GlobalMergeFuncPassWrapper, "global-merge-func",
- "Global merge function pass", false, false)
-INITIALIZE_PASS_END(GlobalMergeFuncPassWrapper, "global-merge-func",
- "Global merge function pass", false, false)
+INITIALIZE_PASS(GlobalMergeFuncPassWrapper, "global-merge-func",
+ "Global merge function pass", false, false)
-namespace llvm {
-ModulePass *createGlobalMergeFuncPass() {
+ModulePass *llvm::createGlobalMergeFuncPass() {
return new GlobalMergeFuncPassWrapper();
}
-} // namespace llvm
GlobalMergeFuncPassWrapper::GlobalMergeFuncPassWrapper() : ModulePass(ID) {
initializeGlobalMergeFuncPassWrapperPass(
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 3485a27..0e38017 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -101,15 +101,11 @@ static cl::opt<bool> EnablePrecomputePhysRegs(
static bool EnablePrecomputePhysRegs = false;
#endif // NDEBUG
-namespace llvm {
-
-cl::opt<bool> UseSegmentSetForPhysRegs(
+cl::opt<bool> llvm::UseSegmentSetForPhysRegs(
"use-segment-set-for-physregs", cl::Hidden, cl::init(true),
cl::desc(
"Use segment set for the computation of the live ranges of physregs."));
-} // end namespace llvm
-
void LiveIntervalsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<LiveVariablesWrapperPass>();
diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp
index e859765..5c78d98 100644
--- a/llvm/lib/CodeGen/MIR2Vec.cpp
+++ b/llvm/lib/CodeGen/MIR2Vec.cpp
@@ -29,20 +29,17 @@ using namespace mir2vec;
STATISTIC(MIRVocabMissCounter,
"Number of lookups to MIR entities not present in the vocabulary");
-namespace llvm {
-namespace mir2vec {
-cl::OptionCategory MIR2VecCategory("MIR2Vec Options");
+cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options");
// FIXME: Use a default vocab when not specified
static cl::opt<std::string>
VocabFile("mir2vec-vocab-path", cl::Optional,
cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""),
cl::cat(MIR2VecCategory));
-cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
- cl::desc("Weight for machine opcode embeddings"),
- cl::cat(MIR2VecCategory));
-} // namespace mir2vec
-} // namespace llvm
+cl::opt<float>
+ llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
+ cl::desc("Weight for machine opcode embeddings"),
+ cl::cat(MIR2VecCategory));
//===----------------------------------------------------------------------===//
// Vocabulary Implementation
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index f5146f5..d988a2a 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -40,7 +40,7 @@ cl::opt<bool> ImprovedFSDiscriminator(
"improved-fs-discriminator", cl::Hidden, cl::init(false),
cl::desc("New FS discriminators encoding (incompatible with the original "
"encoding)"));
-}
+} // namespace llvm
char MIRAddFSDiscriminators::ID = 0;
diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp
index bc65700..cbf8867 100644
--- a/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -23,10 +23,6 @@
using namespace llvm;
-namespace llvm {
-extern char &MIRNamerID;
-} // namespace llvm
-
#define DEBUG_TYPE "mir-namer"
namespace {
@@ -53,10 +49,9 @@ public:
VRegRenamer Renamer(MF.getRegInfo());
- unsigned BBIndex = 0;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- for (auto &MBB : RPOT)
- Changed |= Renamer.renameVRegs(MBB, BBIndex++);
+ for (const auto &[BBIndex, MBB] : enumerate(RPOT))
+ Changed |= Renamer.renameVRegs(MBB, BBIndex);
return Changed;
}
@@ -66,10 +61,4 @@ public:
char MIRNamer::ID;
-char &llvm::MIRNamerID = MIRNamer::ID;
-
-INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false,
- false)
-
-INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false,
- false)
+INITIALIZE_PASS(MIRNamer, "mir-namer", "Rename Register Operands", false, false)
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index bf8a6cd..96428cd 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -107,10 +107,8 @@ struct MFPrintState {
} // end anonymous namespace
-namespace llvm::yaml {
-
/// This struct serializes the LLVM IR module.
-template <> struct BlockScalarTraits<Module> {
+template <> struct yaml::BlockScalarTraits<Module> {
static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
Mod.print(OS, nullptr);
}
@@ -121,8 +119,6 @@ template <> struct BlockScalarTraits<Module> {
}
};
-} // end namespace llvm::yaml
-
static void printRegMIR(Register Reg, yaml::StringValue &Dest,
const TargetRegisterInfo *TRI) {
raw_string_ostream OS(Dest.Value);
diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
index b2731b69..a72c2c4 100644
--- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
@@ -97,7 +97,9 @@ static const bool EnableDevelopmentFeatures = false;
/// this happens only in development mode. It's a no-op otherwise.
namespace llvm {
extern cl::opt<unsigned> EvictInterferenceCutoff;
+} // namespace llvm
+namespace {
class RegAllocScoring : public MachineFunctionPass {
public:
static char ID;
@@ -124,11 +126,12 @@ public:
/// Performs this pass
bool runOnMachineFunction(MachineFunction &) override;
};
+} // namespace
char RegAllocScoring::ID = 0;
-FunctionPass *createRegAllocScoringPass() { return new RegAllocScoring(); }
-
-} // namespace llvm
+FunctionPass *llvm::createRegAllocScoringPass() {
+ return new RegAllocScoring();
+}
INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
"Register Allocation Scoring Pass", false, false)
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index e7fa082..26eb10f 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -29,7 +29,6 @@ using namespace llvm;
#define DEBUG_TYPE "machine-block-freq"
-namespace llvm {
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
cl::desc("Pop up a window to show a dag displaying how machine block "
@@ -44,6 +43,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
clEnumValN(GVDT_Count, "count", "display a graph using the real "
"profile count if available.")));
+namespace llvm {
// Similar option above, but used to control BFI display only after MBP pass
cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
"view-block-layout-with-bfi", cl::Hidden,
@@ -69,15 +69,15 @@ extern cl::opt<std::string> ViewBlockFreqFuncName;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
extern cl::opt<unsigned> ViewHotFreqPercent;
-static cl::opt<bool> PrintMachineBlockFreq(
- "print-machine-bfi", cl::init(false), cl::Hidden,
- cl::desc("Print the machine block frequency info."));
-
// Command line option to specify the name of the function for block frequency
// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
extern cl::opt<std::string> PrintBFIFuncName;
} // namespace llvm
+static cl::opt<bool>
+ PrintMachineBlockFreq("print-machine-bfi", cl::init(false), cl::Hidden,
+ cl::desc("Print the machine block frequency info."));
+
static GVDAGType getGVDT() {
if (ViewBlockLayoutWithBFI != GVDT_None)
return ViewBlockLayoutWithBFI;
@@ -85,9 +85,7 @@ static GVDAGType getGVDT() {
return ViewMachineBlockFreqPropagationDAG;
}
-namespace llvm {
-
-template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
+template <> struct llvm::GraphTraits<MachineBlockFrequencyInfo *> {
using NodeRef = const MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;
@@ -116,7 +114,7 @@ using MBFIDOTGraphTraitsBase =
MachineBranchProbabilityInfo>;
template <>
-struct DOTGraphTraits<MachineBlockFrequencyInfo *>
+struct llvm::DOTGraphTraits<MachineBlockFrequencyInfo *>
: public MBFIDOTGraphTraitsBase {
const MachineFunction *CurFunc = nullptr;
DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
@@ -159,8 +157,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
}
};
-} // end namespace llvm
-
AnalysisKey MachineBlockFrequencyAnalysis::Key;
MachineBlockFrequencyAnalysis::Result
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 2e92dd8..7ca4582 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -18,13 +18,8 @@
using namespace llvm;
-INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass,
- "machine-branch-prob",
- "Machine Branch Probability Analysis", false, true)
-INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass,
- "machine-branch-prob",
- "Machine Branch Probability Analysis", false, true)
-
+INITIALIZE_PASS(MachineBranchProbabilityInfoWrapperPass, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
namespace llvm {
cl::opt<unsigned>
StaticLikelyProb("static-likely-prob",
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 224231c..bfa5ab2 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -719,43 +719,41 @@ MachineFunction::CallSiteInfo::CallSiteInfo(const CallBase &CB) {
}
}
-namespace llvm {
+template <>
+struct llvm::DOTGraphTraits<const MachineFunction *>
+ : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
- template<>
- struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
- DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+ static std::string getGraphName(const MachineFunction *F) {
+ return ("CFG for '" + F->getName() + "' function").str();
+ }
- static std::string getGraphName(const MachineFunction *F) {
- return ("CFG for '" + F->getName() + "' function").str();
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << printMBBReference(*Node);
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
}
- std::string getNodeLabel(const MachineBasicBlock *Node,
- const MachineFunction *Graph) {
- std::string OutStr;
- {
- raw_string_ostream OSS(OutStr);
-
- if (isSimple()) {
- OSS << printMBBReference(*Node);
- if (const BasicBlock *BB = Node->getBasicBlock())
- OSS << ": " << BB->getName();
- } else
- Node->print(OSS);
- }
-
- if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
-
- // Process string output to make it nicer...
- for (unsigned i = 0; i != OutStr.length(); ++i)
- if (OutStr[i] == '\n') { // Left justify
- OutStr[i] = '\\';
- OutStr.insert(OutStr.begin()+i+1, 'l');
- }
- return OutStr;
- }
- };
+ if (OutStr[0] == '\n')
+ OutStr.erase(OutStr.begin());
-} // end namespace llvm
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin() + i + 1, 'l');
+ }
+ return OutStr;
+ }
+};
void MachineFunction::viewCFG() const
{
diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0f88a7b..5111322 100644
--- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -60,13 +60,11 @@ char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
"Machine Function Printer", false, false)
-namespace llvm {
/// Returns a newly-created MachineFunction Printer pass. The
/// default banner is empty.
///
-MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
- const std::string &Banner){
+MachineFunctionPass *
+llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner) {
return new MachineFunctionPrinterPass(OS, Banner);
}
-
-}
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index fdae3b4..9feb974 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -593,15 +593,12 @@ struct MachineOutliner : public ModulePass {
char MachineOutliner::ID = 0;
-namespace llvm {
-ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
+ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
MachineOutliner *OL = new MachineOutliner();
OL->RunOutlinerMode = RunOutlinerMode;
return OL;
}
-} // namespace llvm
-
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
false)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 89ed4da..a717d9e 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -201,16 +201,15 @@ static cl::opt<unsigned> SwpMaxNumStores(
cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden,
cl::init(200));
-namespace llvm {
-
// A command line option to enable the CopyToPhi DAG mutation.
-cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
- cl::init(true),
- cl::desc("Enable CopyToPhi DAG Mutation"));
+cl::opt<bool>
+ llvm::SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true),
+ cl::desc("Enable CopyToPhi DAG Mutation"));
/// A command line argument to force pipeliner to use specified issue
/// width.
-cl::opt<int> SwpForceIssueWidth(
+cl::opt<int> llvm::SwpForceIssueWidth(
"pipeliner-force-issue-width",
cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
cl::init(-1));
@@ -226,8 +225,6 @@ static cl::opt<WindowSchedulingFlag> WindowSchedulingOption(
clEnumValN(WindowSchedulingFlag::WS_Force, "force",
"Use window algorithm instead of SMS algorithm.")));
-} // end namespace llvm
-
unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
char MachinePipeliner::ID = 0;
#ifndef NDEBUG
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 299bcc4..3ed1045 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -176,9 +176,7 @@ STATISTIC(NumNodeOrderPostRA,
STATISTIC(NumFirstValidPostRA,
"Number of scheduling units chosen for FirstValid heuristic post-RA");
-namespace llvm {
-
-cl::opt<MISched::Direction> PreRADirection(
+cl::opt<MISched::Direction> llvm::PreRADirection(
"misched-prera-direction", cl::Hidden,
cl::desc("Pre reg-alloc list scheduling direction"),
cl::init(MISched::Unspecified),
@@ -206,33 +204,31 @@ static cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
-cl::opt<bool> VerifyScheduling(
+cl::opt<bool> llvm::VerifyScheduling(
"verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
#ifndef NDEBUG
-cl::opt<bool> ViewMISchedDAGs(
+cl::opt<bool> llvm::ViewMISchedDAGs(
"view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
-cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
- cl::desc("Print schedule DAGs"));
-cl::opt<bool> MISchedDumpReservedCycles(
+cl::opt<bool> llvm::PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
+static cl::opt<bool> MISchedDumpReservedCycles(
"misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
cl::desc("Dump resource usage at schedule boundary."));
-cl::opt<bool> MischedDetailResourceBooking(
+static cl::opt<bool> MischedDetailResourceBooking(
"misched-detail-resource-booking", cl::Hidden, cl::init(false),
cl::desc("Show details of invoking getNextResoufceCycle."));
#else
-const bool ViewMISchedDAGs = false;
-const bool PrintDAGs = false;
-const bool MischedDetailResourceBooking = false;
+const bool llvm::ViewMISchedDAGs = false;
+const bool llvm::PrintDAGs = false;
+static const bool MischedDetailResourceBooking = false;
#ifdef LLVM_ENABLE_DUMP
-const bool MISchedDumpReservedCycles = false;
+static const bool MISchedDumpReservedCycles = false;
#endif // LLVM_ENABLE_DUMP
#endif // NDEBUG
-} // end namespace llvm
-
#ifndef NDEBUG
/// In some situations a few uninteresting nodes depend on nearly all other
/// nodes in the graph, provide a cutoff to hide them.
@@ -2053,28 +2049,24 @@ public:
} // end anonymous namespace
-namespace llvm {
-
std::unique_ptr<ScheduleDAGMutation>
-createLoadClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- bool ReorderWhileClustering) {
+llvm::createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering) {
return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
TII, TRI, ReorderWhileClustering)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
-createStoreClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- bool ReorderWhileClustering) {
+llvm::createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering) {
return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
TII, TRI, ReorderWhileClustering)
: nullptr;
}
-} // end namespace llvm
-
// Sorting all the loads/stores first, then for each load/store, checking the
// following load/store one by one, until reach the first non-dependent one and
// call target hook to see if they can cluster.
@@ -2304,16 +2296,12 @@ protected:
} // end anonymous namespace
-namespace llvm {
-
std::unique_ptr<ScheduleDAGMutation>
-createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
+llvm::createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
return std::make_unique<CopyConstrain>(TII, TRI);
}
-} // end namespace llvm
-
/// constrainLocalCopy handles two possibilities:
/// 1) Local src:
/// I0: = dst
@@ -3445,14 +3433,13 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
}
#endif
-namespace llvm {
/// Return true if this heuristic determines order.
/// TODO: Consider refactor return type of these functions as integer or enum,
/// as we may need to differentiate whether TryCand is better than Cand.
-bool tryLess(int TryVal, int CandVal,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
+bool llvm::tryLess(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
if (TryVal < CandVal) {
TryCand.Reason = Reason;
return true;
@@ -3465,10 +3452,10 @@ bool tryLess(int TryVal, int CandVal,
return false;
}
-bool tryGreater(int TryVal, int CandVal,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
+bool llvm::tryGreater(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
if (TryVal > CandVal) {
TryCand.Reason = Reason;
return true;
@@ -3481,9 +3468,9 @@ bool tryGreater(int TryVal, int CandVal,
return false;
}
-bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- SchedBoundary &Zone) {
+bool llvm::tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ SchedBoundary &Zone) {
if (Zone.isTop()) {
// Prefer the candidate with the lesser depth, but only if one of them has
// depth greater than the total latency scheduled so far, otherwise either
@@ -3513,7 +3500,6 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
}
return false;
}
-} // end namespace llvm
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop,
bool IsPostRA = false) {
@@ -3798,14 +3784,12 @@ void GenericScheduler::registerRoots() {
}
}
-namespace llvm {
-bool tryPressure(const PressureChange &TryP,
- const PressureChange &CandP,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason,
- const TargetRegisterInfo *TRI,
- const MachineFunction &MF) {
+bool llvm::tryPressure(const PressureChange &TryP, const PressureChange &CandP,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
@@ -3838,7 +3822,7 @@ bool tryPressure(const PressureChange &TryP,
return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
}
-unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+unsigned llvm::getWeakLeft(const SUnit *SU, bool isTop) {
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
}
@@ -3849,7 +3833,7 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) {
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
/// with the operation that produces or consumes the physreg. We'll do this when
/// regalloc has support for parallel copies.
-int biasPhysReg(const SUnit *SU, bool isTop) {
+int llvm::biasPhysReg(const SUnit *SU, bool isTop) {
const MachineInstr *MI = SU->getInstr();
if (MI->isCopy()) {
@@ -3884,7 +3868,6 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
return 0;
}
-} // end namespace llvm
void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
@@ -4812,13 +4795,13 @@ static MachineSchedRegistry ShufflerRegistry(
//===----------------------------------------------------------------------===//
#ifndef NDEBUG
-namespace llvm {
-template<> struct GraphTraits<
- ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+template <>
+struct llvm::GraphTraits<ScheduleDAGMI *> : public GraphTraits<ScheduleDAG *> {
+};
-template<>
-struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+template <>
+struct llvm::DOTGraphTraits<ScheduleDAGMI *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
@@ -4878,7 +4861,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
}
};
-} // end namespace llvm
#endif // NDEBUG
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index c2d4aa0..9ac3f741 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -485,10 +485,7 @@ struct LoopBounds {
// Specialize po_iterator_storage in order to prune the post-order traversal so
// it is limited to the current loop and doesn't traverse the loop back edges.
-namespace llvm {
-
-template<>
-class po_iterator_storage<LoopBounds, true> {
+template <> class llvm::po_iterator_storage<LoopBounds, true> {
LoopBounds &LB;
public:
@@ -519,8 +516,6 @@ public:
}
};
-} // end namespace llvm
-
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index 087ac62..59c587c 100644
--- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -9,7 +9,7 @@
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/ADT/STLExtras.h"
-namespace llvm {
+using namespace llvm;
DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
auto I = Strings.try_emplace(S);
@@ -43,5 +43,3 @@ NonRelocatableStringpool::getEntriesForEmission() const {
});
return Result;
}
-
-} // namespace llvm
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 6f373a5..e9ffa85 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -76,8 +76,6 @@ using namespace llvm::safestack;
#define DEBUG_TYPE "safe-stack"
-namespace llvm {
-
STATISTIC(NumFunctions, "Total number of functions");
STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack");
STATISTIC(NumUnsafeStackRestorePointsFunctions,
@@ -89,8 +87,6 @@ STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
-} // namespace llvm
-
/// Use __safestack_pointer_address even if the platform has a faster way of
/// access safe stack pointer.
static cl::opt<bool>
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index eae2e8c..3268c26 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1551,14 +1551,10 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
-namespace llvm {
-
LLVM_ATTRIBUTE_UNUSED
-raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
}
-} // end namespace llvm
-
#endif
diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index e7b1494..c80eade 100644
--- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -16,57 +16,51 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace llvm {
- template<>
- struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+template <>
+struct llvm::DOTGraphTraits<ScheduleDAG *> : public DefaultDOTGraphTraits {
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
- static std::string getGraphName(const ScheduleDAG *G) {
- return std::string(G->MF.getName());
- }
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return std::string(G->MF.getName());
+ }
- static bool renderGraphFromBottomUp() {
- return true;
- }
+ static bool renderGraphFromBottomUp() { return true; }
- static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
- return (Node->NumPreds > 10 || Node->NumSuccs > 10);
- }
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
- static std::string getNodeIdentifierLabel(const SUnit *Node,
- const ScheduleDAG *Graph) {
- std::string R;
- raw_string_ostream OS(R);
- OS << static_cast<const void *>(Node);
- return R;
- }
+ static std::string getNodeIdentifierLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+ OS << static_cast<const void *>(Node);
+ return R;
+ }
- /// If you want to override the dot attributes printed for a particular
- /// edge, override this method.
- static std::string getEdgeAttributes(const SUnit *Node,
- SUnitIterator EI,
- const ScheduleDAG *Graph) {
- if (EI.isArtificialDep())
- return "color=cyan,style=dashed";
- if (EI.isCtrlDep())
- return "color=blue,style=dashed";
- return "";
- }
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+ std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
- std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph);
- static std::string getNodeAttributes(const SUnit *N,
- const ScheduleDAG *Graph) {
- return "shape=Mrecord";
- }
-
- static void addCustomGraphFeatures(ScheduleDAG *G,
- GraphWriter<ScheduleDAG*> &GW) {
- return G->addCustomGraphFeatures(GW);
- }
- };
-}
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG *> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+};
std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
const ScheduleDAG *G) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1accdd..e153842 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -509,6 +509,7 @@ namespace {
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFMAD(SDNode *N);
+ SDValue visitFMULADD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FMAD: return visitFMAD(N);
+ case ISD::FMULADD: return visitFMULADD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -18444,6 +18446,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMULADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Constant fold FMULADD.
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
+ return C;
+
+ return SDValue();
+}
+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 08af74c..4512c5c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5786,6 +5786,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FCOPYSIGN:
case ISD::FMA:
case ISD::FMAD:
+ case ISD::FMULADD:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
@@ -5904,6 +5905,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
case ISD::FCOSH:
case ISD::FTANH:
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (SNaN)
return true;
@@ -7231,7 +7233,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// Handle fma/fmad special cases.
- if (Opcode == ISD::FMA || Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[2].getValueType() == VT && "FMA types must match!");
@@ -7242,7 +7244,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
APFloat V1 = C1->getValueAPF();
const APFloat &V2 = C2->getValueAPF();
const APFloat &V3 = C3->getValueAPF();
- if (Opcode == ISD::FMAD) {
+ if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) {
V1.multiply(V2, APFloat::rmNearestTiesToEven);
V1.add(V3, APFloat::rmNearestTiesToEven);
} else
@@ -11844,25 +11846,38 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops,
+ bool AllowCommute) {
SDNodeFlags Flags;
if (Inserter)
Flags = Inserter->getFlags();
- return getNodeIfExists(Opcode, VTList, Ops, Flags);
+ return getNodeIfExists(Opcode, VTList, Ops, Flags, AllowCommute);
}
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags) {
- if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ const SDNodeFlags Flags,
+ bool AllowCommute) {
+ if (VTList.VTs[VTList.NumVTs - 1] == MVT::Glue)
+ return nullptr;
+
+ auto Lookup = [&](ArrayRef<SDValue> LookupOps) -> SDNode * {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTList, Ops);
+ AddNodeIDNode(ID, Opcode, VTList, LookupOps);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP)) {
E->intersectFlagsWith(Flags);
return E;
}
- }
+ return nullptr;
+ };
+
+ if (SDNode *Existing = Lookup(Ops))
+ return Existing;
+
+ if (AllowCommute && TLI->isCommutativeBinOp(Opcode))
+ return Lookup({Ops[1], Ops[0]});
+
return nullptr;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c21890a..0f2b518 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)), Flags));
+ } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) {
+ // TODO: Support splitting the vector.
+ setValue(&I, DAG.getNode(ISD::FMULADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
SDValue Mul = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fcfbfe6..39cbfad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMA: return "fma";
case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
+ case ISD::FMULADD: return "fmuladd";
case ISD::FREM: return "frem";
case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cc503d3..920dff9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
break;
}
case ISD::FMA:
+ case ISD::FMULADD:
case ISD::FMAD: {
if (!Flags.hasNoSignedZeros())
break;
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 64e5cd5..95a9c3f 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -306,10 +306,7 @@ char &llvm::StackFrameLayoutAnalysisPassID = StackFrameLayoutAnalysisLegacy::ID;
INITIALIZE_PASS(StackFrameLayoutAnalysisLegacy, "stack-frame-layout",
"Stack Frame Layout", false, false)
-namespace llvm {
/// Returns a newly-created StackFrameLayout pass.
-MachineFunctionPass *createStackFrameLayoutAnalysisPass() {
+MachineFunctionPass *llvm::createStackFrameLayoutAnalysisPass() {
return new StackFrameLayoutAnalysisLegacy();
}
-
-} // namespace llvm
diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
index 53a9ab4..eac20120 100644
--- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp
+++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
@@ -75,22 +75,11 @@ bool StaticDataAnnotator::runOnModule(Module &M) {
bool Changed = false;
for (auto &GV : M.globals()) {
- if (GV.isDeclarationForLinker())
+ if (!llvm::memprof::IsAnnotationOK(GV))
continue;
- // The implementation below assumes prior passes don't set section prefixes,
- // and specifically do 'assign' rather than 'update'. So report error if a
- // section prefix is already set.
- if (auto maybeSectionPrefix = GV.getSectionPrefix();
- maybeSectionPrefix && !maybeSectionPrefix->empty())
- llvm::report_fatal_error("Global variable " + GV.getName() +
- " already has a section prefix " +
- *maybeSectionPrefix);
-
StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI);
- if (SectionPrefix.empty())
- continue;
-
+ // setSectionPrefix returns true if the section prefix is updated.
Changed |= GV.setSectionPrefix(SectionPrefix);
}
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e22dc25..1593a40 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -130,10 +130,8 @@ StaticDataSplitter::getConstant(const MachineOperand &Op,
if (Op.isGlobal()) {
// Find global variables with local linkage.
const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
- // Skip 'llvm.'-prefixed global variables conservatively because they are
- // often handled specially, and skip those not in static data
- // sections.
- if (!GV || GV->getName().starts_with("llvm.") ||
+ // Skip those not eligible for annotation or not in static data sections.
+ if (!GV || !llvm::memprof::IsAnnotationOK(*GV) ||
!inStaticDataSection(*GV, TM))
return nullptr;
return GV;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index c23281a..060b1dd 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -815,7 +815,8 @@ void TargetLoweringBase::initActions() {
ISD::FTAN, ISD::FACOS,
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
- ISD::FTANH, ISD::FATAN2},
+ ISD::FTANH, ISD::FATAN2,
+ ISD::FMULADD},
VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index c9e4618..971f822 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -102,10 +102,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
return true;
}
-namespace llvm {
-
-Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
- unsigned SubIdx, const MachineRegisterInfo *MRI) {
+Printable llvm::printReg(Register Reg, const TargetRegisterInfo *TRI,
+ unsigned SubIdx, const MachineRegisterInfo *MRI) {
return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {
if (!Reg)
OS << "$noreg";
@@ -135,7 +133,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
});
}
-Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable llvm::printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
// Generic printout when TRI is missing.
if (!TRI) {
@@ -158,7 +156,7 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable llvm::printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
if (Register::isVirtualRegister(Unit)) {
OS << '%' << Register(Unit).virtRegIndex();
@@ -168,8 +166,9 @@ Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo,
- const TargetRegisterInfo *TRI) {
+Printable llvm::printRegClassOrBank(Register Reg,
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
if (RegInfo.getRegClassOrNull(Reg))
OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
@@ -183,8 +182,6 @@ Printable printRegClassOrBank(Register Reg, const MachineRegisterInfo &RegInfo,
});
}
-} // end namespace llvm
-
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
const TargetRegisterClass *
diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp
index 33734b8..bb8d2cb 100644
--- a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp
@@ -90,7 +90,7 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize;
Seg.Addr = NextSegAddr;
- Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize);
+ Seg.WorkingMem = Mapper->prepare(G, NextSegAddr, TotalSize);
NextSegAddr += alignTo(TotalSize, Mapper->getPageSize());
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index ea3b22a..7b327af 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -58,7 +58,8 @@ void InProcessMemoryMapper::reserve(size_t NumBytes,
ExecutorAddrRange(ExecutorAddr::fromPtr(MB.base()), MB.allocatedSize()));
}
-char *InProcessMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) {
+char *InProcessMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr,
+ size_t ContentSize) {
return Addr.toPtr<char *>();
}
@@ -324,7 +325,8 @@ void SharedMemoryMapper::reserve(size_t NumBytes,
#endif
}
-char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) {
+char *SharedMemoryMapper::prepare(jitlink::LinkGraph &G, ExecutorAddr Addr,
+ size_t ContentSize) {
auto R = Reservations.upper_bound(Addr);
assert(R != Reservations.begin() && "Attempt to prepare unreserved range");
R--;
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index 51d2e21..5b87686 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -8,6 +8,7 @@
#include "llvm/IR/ConstantFPRange.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -506,3 +507,168 @@ ConstantFPRange ConstantFPRange::sub(const ConstantFPRange &Other) const {
// fsub X, Y = fadd X, (fneg Y)
return add(Other.negate());
}
+
+void ConstantFPRange::flushDenormals(DenormalMode::DenormalModeKind Mode) {
+ if (Mode == DenormalMode::IEEE)
+ return;
+ FPClassTest Class = classify();
+ if (!(Class & fcSubnormal))
+ return;
+
+ auto &Sem = getSemantics();
+ // PreserveSign: PosSubnormal -> PosZero, NegSubnormal -> NegZero
+ // PositiveZero: PosSubnormal -> PosZero, NegSubnormal -> PosZero
+ // Dynamic: PosSubnormal -> PosZero, NegSubnormal -> NegZero/PosZero
+ bool ZeroLowerNegative =
+ Mode != DenormalMode::PositiveZero && (Class & fcNegSubnormal);
+ bool ZeroUpperNegative =
+ Mode == DenormalMode::PreserveSign && !(Class & fcPosSubnormal);
+ assert((ZeroLowerNegative || !ZeroUpperNegative) &&
+ "ZeroLower is greater than ZeroUpper.");
+ Lower = minnum(Lower, APFloat::getZero(Sem, ZeroLowerNegative));
+ Upper = maxnum(Upper, APFloat::getZero(Sem, ZeroUpperNegative));
+}
+
+/// Represent a contiguous range of values sharing the same sign.
+struct SameSignRange {
+ bool HasZero;
+ bool HasNonZero;
+ bool HasInf;
+ // The lower and upper bounds of the range (inclusive).
+ // The sign is dropped and infinities are excluded.
+ std::optional<std::pair<APFloat, APFloat>> FinitePart;
+
+ explicit SameSignRange(const APFloat &Lower, const APFloat &Upper)
+ : HasZero(Lower.isZero()), HasNonZero(!Upper.isZero()),
+ HasInf(Upper.isInfinity()) {
+ assert(!Lower.isNegative() && !Upper.isNegative() &&
+ "The sign should be dropped.");
+ assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan &&
+ "Empty set.");
+ if (!Lower.isInfinity())
+ FinitePart = {Lower,
+ HasInf ? APFloat::getLargest(Lower.getSemantics()) : Upper};
+ }
+};
+
+/// Split the range into positive and negative components.
+static void splitPosNeg(const APFloat &Lower, const APFloat &Upper,
+ std::optional<SameSignRange> &NegPart,
+ std::optional<SameSignRange> &PosPart) {
+ assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan &&
+ "Non-NaN part is empty.");
+ if (Lower.isNegative() == Upper.isNegative()) {
+ if (Lower.isNegative())
+ NegPart = SameSignRange{abs(Upper), abs(Lower)};
+ else
+ PosPart = SameSignRange{Lower, Upper};
+ return;
+ }
+ auto &Sem = Lower.getSemantics();
+ NegPart = SameSignRange{APFloat::getZero(Sem), abs(Lower)};
+ PosPart = SameSignRange{APFloat::getZero(Sem), Upper};
+}
+
+ConstantFPRange ConstantFPRange::mul(const ConstantFPRange &Other) const {
+ auto &Sem = getSemantics();
+ bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) ||
+ ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet());
+ if (isNaNOnly() || Other.isNaNOnly())
+ return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN,
+ /*MayBeSNaN=*/false);
+ std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos;
+ splitPosNeg(Lower, Upper, LHSNeg, LHSPos);
+ splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos);
+ APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false);
+ APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true);
+ auto Update = [&](std::optional<SameSignRange> &LHS,
+ std::optional<SameSignRange> &RHS, bool Negative) {
+ if (!LHS || !RHS)
+ return;
+ // 0 * inf = QNaN
+ ResMayBeQNaN |= LHS->HasZero && RHS->HasInf;
+ ResMayBeQNaN |= RHS->HasZero && LHS->HasInf;
+ // NonZero * inf = inf
+ if ((LHS->HasInf && RHS->HasNonZero) || (RHS->HasInf && LHS->HasNonZero))
+ (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative);
+ // Finite * Finite
+ if (LHS->FinitePart && RHS->FinitePart) {
+ APFloat NewLower = LHS->FinitePart->first * RHS->FinitePart->first;
+ APFloat NewUpper = LHS->FinitePart->second * RHS->FinitePart->second;
+ if (Negative) {
+ ResLower = minnum(ResLower, -NewUpper);
+ ResUpper = maxnum(ResUpper, -NewLower);
+ } else {
+ ResLower = minnum(ResLower, NewLower);
+ ResUpper = maxnum(ResUpper, NewUpper);
+ }
+ }
+ };
+ Update(LHSNeg, RHSNeg, /*Negative=*/false);
+ Update(LHSNeg, RHSPos, /*Negative=*/true);
+ Update(LHSPos, RHSNeg, /*Negative=*/true);
+ Update(LHSPos, RHSPos, /*Negative=*/false);
+ return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false);
+}
+
+ConstantFPRange ConstantFPRange::div(const ConstantFPRange &Other) const {
+ auto &Sem = getSemantics();
+ bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) ||
+ ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet());
+ if (isNaNOnly() || Other.isNaNOnly())
+ return getNaNOnly(Sem, /*MayBeQNaN=*/ResMayBeQNaN,
+ /*MayBeSNaN=*/false);
+ std::optional<SameSignRange> LHSNeg, LHSPos, RHSNeg, RHSPos;
+ splitPosNeg(Lower, Upper, LHSNeg, LHSPos);
+ splitPosNeg(Other.Lower, Other.Upper, RHSNeg, RHSPos);
+ APFloat ResLower = APFloat::getInf(Sem, /*Negative=*/false);
+ APFloat ResUpper = APFloat::getInf(Sem, /*Negative=*/true);
+ auto Update = [&](std::optional<SameSignRange> &LHS,
+ std::optional<SameSignRange> &RHS, bool Negative) {
+ if (!LHS || !RHS)
+ return;
+ // inf / inf = QNaN 0 / 0 = QNaN
+ ResMayBeQNaN |= LHS->HasInf && RHS->HasInf;
+ ResMayBeQNaN |= LHS->HasZero && RHS->HasZero;
+ // It is not straightforward to infer HasNonZeroFinite = HasFinite &&
+ // HasNonZero. By definitions we have:
+ // HasFinite = HasNonZeroFinite || HasZero
+ // HasNonZero = HasNonZeroFinite || HasInf
+ // Since the range is contiguous, if both HasFinite and HasNonZero are true,
+ // HasNonZeroFinite must be true.
+ bool LHSHasNonZeroFinite = LHS->FinitePart && LHS->HasNonZero;
+ bool RHSHasNonZeroFinite = RHS->FinitePart && RHS->HasNonZero;
+ // inf / Finite = inf FiniteNonZero / 0 = inf
+ if ((LHS->HasInf && RHS->FinitePart) ||
+ (LHSHasNonZeroFinite && RHS->HasZero))
+ (Negative ? ResLower : ResUpper) = APFloat::getInf(Sem, Negative);
+ // Finite / inf = 0
+ if (LHS->FinitePart && RHS->HasInf) {
+ APFloat Zero = APFloat::getZero(Sem, /*Negative=*/Negative);
+ ResLower = minnum(ResLower, Zero);
+ ResUpper = maxnum(ResUpper, Zero);
+ }
+ // Finite / FiniteNonZero
+ if (LHS->FinitePart && RHSHasNonZeroFinite) {
+ assert(!RHS->FinitePart->second.isZero() &&
+ "Divisor should be non-zero.");
+ APFloat NewLower = LHS->FinitePart->first / RHS->FinitePart->second;
+ APFloat NewUpper = LHS->FinitePart->second /
+ (RHS->FinitePart->first.isZero()
+ ? APFloat::getSmallest(Sem, /*Negative=*/false)
+ : RHS->FinitePart->first);
+ if (Negative) {
+ ResLower = minnum(ResLower, -NewUpper);
+ ResUpper = maxnum(ResUpper, -NewLower);
+ } else {
+ ResLower = minnum(ResLower, NewLower);
+ ResUpper = maxnum(ResUpper, NewUpper);
+ }
+ }
+ };
+ Update(LHSNeg, RHSNeg, /*Negative=*/false);
+ Update(LHSNeg, RHSPos, /*Negative=*/true);
+ Update(LHSPos, RHSNeg, /*Negative=*/true);
+ Update(LHSPos, RHSPos, /*Negative=*/false);
+ return ConstantFPRange(ResLower, ResUpper, ResMayBeQNaN, /*MayBeSNaN=*/false);
+}
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 2c2950c..cbce8bd 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -667,8 +667,11 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
if (CE->getOpcode() == Instruction::Sub) {
ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
- if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt &&
- RHS->getOpcode() == Instruction::PtrToInt) {
+ if (LHS && RHS &&
+ (LHS->getOpcode() == Instruction::PtrToInt ||
+ LHS->getOpcode() == Instruction::PtrToAddr) &&
+ (RHS->getOpcode() == Instruction::PtrToInt ||
+ RHS->getOpcode() == Instruction::PtrToAddr)) {
Constant *LHSOp0 = LHS->getOperand(0);
Constant *RHSOp0 = RHS->getOperand(0);
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 3f1cc1e..27d8294 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -4098,15 +4098,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
return wrap(unwrap(B)->CreateGlobalString(Str, Name));
}
-LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
- Value *P = unwrap(MemAccessInst);
- if (LoadInst *LI = dyn_cast<LoadInst>(P))
- return LI->isVolatile();
- if (StoreInst *SI = dyn_cast<StoreInst>(P))
- return SI->isVolatile();
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(P))
- return AI->isVolatile();
- return cast<AtomicCmpXchgInst>(P)->isVolatile();
+LLVMBool LLVMGetVolatile(LLVMValueRef Inst) {
+ return cast<Instruction>(unwrap(Inst))->isVolatile();
}
void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 614c3a9..15c0198 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -1002,6 +1003,18 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall(
return C;
}
+Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True,
+ Value *False,
+ StringRef PassName,
+ const Twine &Name) {
+ Value *Ret = CreateSelectFMF(C, True, False, {}, Name);
+ if (auto *SI = dyn_cast<SelectInst>(Ret)) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *SI, *SI->getParent()->getParent(), PassName);
+ }
+ return Ret;
+}
+
Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False,
const Twine &Name, Instruction *MDFrom) {
return CreateSelectFMF(C, True, False, {}, Name, MDFrom);
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 88e7c44..9060a89 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2965,8 +2965,7 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp,
// zext, sext -> zext, because sext can't sign extend after zext
return Instruction::ZExt;
case 11: {
- // inttoptr, ptrtoint/ptrtoaddr -> bitcast if SrcSize<=PtrSize/AddrSize
- // and SrcSize==DstSize
+ // inttoptr, ptrtoint/ptrtoaddr -> integer cast
if (!DL)
return 0;
unsigned MidSize = secondOp == Instruction::PtrToAddr
@@ -2974,10 +2973,15 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp,
: DL->getPointerTypeSizeInBits(MidTy);
unsigned SrcSize = SrcTy->getScalarSizeInBits();
unsigned DstSize = DstTy->getScalarSizeInBits();
- // TODO: Could also produce zext or trunc here.
- if (SrcSize <= MidSize && SrcSize == DstSize)
- return Instruction::BitCast;
- return 0;
+ // If the middle size is smaller than both source and destination,
+ // an additional masking operation would be required.
+ if (MidSize < SrcSize && MidSize < DstSize)
+ return 0;
+ if (DstSize < SrcSize)
+ return Instruction::Trunc;
+ if (DstSize > SrcSize)
+ return Instruction::ZExt;
+ return Instruction::BitCast;
}
case 12:
// addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c9ff86b..c79a950 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -893,7 +893,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (GV.hasInitializer()) {
const Constant *Init = GV.getInitializer();
const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
- Check(InitArray, "wrong initalizer for intrinsic global variable",
+ Check(InitArray, "wrong initializer for intrinsic global variable",
Init);
for (Value *Op : InitArray->operands()) {
Value *V = Op->stripPointerCasts();
diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp
index 6b65720..5ab1def 100644
--- a/llvm/lib/Support/DebugCounter.cpp
+++ b/llvm/lib/Support/DebugCounter.cpp
@@ -136,6 +136,13 @@ struct DebugCounterOwner : DebugCounter {
cl::location(this->ShouldPrintCounter),
cl::init(false),
cl::desc("Print out debug counter info after all counters accumulated")};
+ cl::opt<bool, true> PrintDebugCounterQueries{
+ "print-debug-counter-queries",
+ cl::Hidden,
+ cl::Optional,
+ cl::location(this->ShouldPrintCounterQueries),
+ cl::init(false),
+ cl::desc("Print out each query of an enabled debug counter")};
cl::opt<bool, true> BreakOnLastCount{
"debug-counter-break-on-last",
cl::Hidden,
@@ -221,31 +228,40 @@ void DebugCounter::print(raw_ostream &OS) const {
}
}
+bool DebugCounter::handleCounterIncrement(CounterInfo &Info) {
+ int64_t CurrCount = Info.Count++;
+ uint64_t CurrIdx = Info.CurrChunkIdx;
+
+ if (Info.Chunks.empty())
+ return true;
+ if (CurrIdx >= Info.Chunks.size())
+ return false;
+
+ bool Res = Info.Chunks[CurrIdx].contains(CurrCount);
+ if (BreakOnLast && CurrIdx == (Info.Chunks.size() - 1) &&
+ CurrCount == Info.Chunks[CurrIdx].End) {
+ LLVM_BUILTIN_DEBUGTRAP;
+ }
+ if (CurrCount > Info.Chunks[CurrIdx].End) {
+ Info.CurrChunkIdx++;
+
+ /// Handle consecutive blocks.
+ if (Info.CurrChunkIdx < Info.Chunks.size() &&
+ CurrCount == Info.Chunks[Info.CurrChunkIdx].Begin)
+ return true;
+ }
+ return Res;
+}
+
bool DebugCounter::shouldExecuteImpl(unsigned CounterName) {
auto &Us = instance();
auto Result = Us.Counters.find(CounterName);
if (Result != Us.Counters.end()) {
auto &CounterInfo = Result->second;
- int64_t CurrCount = CounterInfo.Count++;
- uint64_t CurrIdx = CounterInfo.CurrChunkIdx;
-
- if (CounterInfo.Chunks.empty())
- return true;
- if (CurrIdx >= CounterInfo.Chunks.size())
- return false;
-
- bool Res = CounterInfo.Chunks[CurrIdx].contains(CurrCount);
- if (Us.BreakOnLast && CurrIdx == (CounterInfo.Chunks.size() - 1) &&
- CurrCount == CounterInfo.Chunks[CurrIdx].End) {
- LLVM_BUILTIN_DEBUGTRAP;
- }
- if (CurrCount > CounterInfo.Chunks[CurrIdx].End) {
- CounterInfo.CurrChunkIdx++;
-
- /// Handle consecutive blocks.
- if (CounterInfo.CurrChunkIdx < CounterInfo.Chunks.size() &&
- CurrCount == CounterInfo.Chunks[CounterInfo.CurrChunkIdx].Begin)
- return true;
+ bool Res = Us.handleCounterIncrement(CounterInfo);
+ if (Us.ShouldPrintCounterQueries && CounterInfo.IsSet) {
+ dbgs() << "DebugCounter " << Us.RegisteredCounters[CounterName] << "="
+ << (CounterInfo.Count - 1) << (Res ? " execute" : " skip") << "\n";
}
return Res;
}
diff --git a/llvm/lib/Support/VirtualOutputBackends.cpp b/llvm/lib/Support/VirtualOutputBackends.cpp
index d6d7b87..de59b8a 100644
--- a/llvm/lib/Support/VirtualOutputBackends.cpp
+++ b/llvm/lib/Support/VirtualOutputBackends.cpp
@@ -498,7 +498,7 @@ Error OnDiskOutputFile::keep() {
// Someone else owns the lock on this file, wait.
switch (Lock.waitForUnlockFor(std::chrono::seconds(256))) {
case WaitForUnlockResult::Success:
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case WaitForUnlockResult::OwnerDied: {
continue; // try again to get the lock.
}
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 2ea3a24..afce803 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -1363,9 +1363,12 @@ const Init *BinOpInit::Fold(const Record *CurRec) const {
}
case LISTSPLAT: {
const auto *Value = dyn_cast<TypedInit>(LHS);
- const auto *Size = dyn_cast<IntInit>(RHS);
- if (Value && Size) {
- SmallVector<const Init *, 8> Args(Size->getValue(), Value);
+ const auto *Count = dyn_cast<IntInit>(RHS);
+ if (Value && Count) {
+ if (Count->getValue() < 0)
+ PrintFatalError(Twine("!listsplat count ") + Count->getAsString() +
+ " is negative");
+ SmallVector<const Init *, 8> Args(Count->getValue(), Value);
return ListInit::get(Args, Value->getType());
}
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6965116..9926a4d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26196,9 +26196,10 @@ static SDValue performFlagSettingCombine(SDNode *N,
return DCI.CombineTo(N, Res, SDValue(N, 1));
}
- // Combine identical generic nodes into this node, re-using the result.
+ // Combine equivalent generic nodes into this node, re-using the result.
if (SDNode *Generic = DCI.DAG.getNodeIfExists(
- GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
+ GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS},
+ /*AllowCommute=*/true))
DCI.CombineTo(Generic, SDValue(N, 0));
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index f110558..7e03b97 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
}
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
- // Assume we can't combine the last pop with the sp restore.
- bool CombineAfterCSRBump = false;
+
+ unsigned ProloguePopSize = PrologueSaveSize;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
+ // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
+ // that needs to be popped until we reach the start of the SVE save area.
+ // The "FixedObject" stack occurs after the SVE area and must be popped
+ // later.
+ ProloguePopSize -= FixedObject;
AfterCSRPopSize += FixedObject;
- } else if (!CombineSPBump && PrologueSaveSize != 0) {
+ }
+
+ // Assume we can't combine the last pop with the sp restore.
+ if (!CombineSPBump && ProloguePopSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
- AArch64InstrInfo::isSEHInstruction(*Pop))
+ AArch64InstrInfo::isSEHInstruction(*Pop) ||
+ (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
+ isPartOfSVECalleeSaves(Pop)))
Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
@@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() {
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
- Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
- PrologueSaveSize);
+ Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
+ ProloguePopSize);
+ } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
+ MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
+ if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
+ ++AfterLastPop;
+ // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
+ // callee-save non-SVE registers to move the stack pointer to the start of
+ // the SVE area.
+ emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(ProloguePopSize), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
} else {
- // If not, make sure to emit an add after the last ldp.
+ // Otherwise, make sure to emit an add after the last ldp.
// We're doing this by transferring the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
// pops.
- AfterCSRPopSize += PrologueSaveSize;
- CombineAfterCSRBump = true;
+ AfterCSRPopSize += ProloguePopSize;
}
}
-
// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() {
--SEHEpilogueStartI;
}
+ // Determine the ranges of SVE callee-saves. This is done before emitting any
+ // code at the end of the epilogue (for Swift async), which can get in the way
+ // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
+ auto [PPR, ZPR] = getSVEStackFrameSizes();
+ auto [PPRRange, ZPRRange] = partitionSVECS(
+ MBB,
+ SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
+ ? MBB.getFirstTerminator()
+ : FirstGPRRestoreI,
+ PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
+
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
@@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
- auto [PPR, ZPR] = getSVEStackFrameSizes();
- auto [PPRRange, ZPRRange] = partitionSVECS(
- MBB,
- SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
- ? MBB.getFirstTerminator()
- : FirstGPRRestoreI,
- PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
-
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
@@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NeedsWinCFI, &HasWinCFI);
}
- // Deallocate callee-save non-SVE registers.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate fixed objects.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(FixedObject), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
@@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
- StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
+ StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 12ddf47..53b00e8 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -273,7 +273,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
EpilogueVectorizationMinVF = 8;
MaxInterleaveFactor = 4;
ScatterOverhead = 13;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case NeoverseN2:
case NeoverseN3:
PrefFunctionAlignment = Align(16);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index dbe74b1..5700468 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
- TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
- Result = true;
+ TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) {
+ // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+ // For our purposes, these shall not be classified as VALU as this results
+ // in unexpected behavior.
+ Result = !MI.mayLoadOrStore();
+ }
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a44af5f..1b559a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2833,8 +2833,8 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
R = getMad(DAG, DL, VT, YH, CH, Mad1);
}
- const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
- (Flags.hasNoInfs() || Options.NoInfsFPMath);
+ const bool IsFiniteOnly =
+ (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs();
// TODO: Check if known finite from source value.
if (!IsFiniteOnly) {
@@ -3161,9 +3161,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);
R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
- const auto &Options = getTargetMachine().Options;
- if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
+ if (!Flags.hasNoInfs()) {
SDValue OverflowCheckConst =
DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
SDValue Overflow =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ee466ca..596a895 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3575,7 +3575,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
const bool IsFiniteOnly =
(MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) &&
- (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath);
+ MI.getFlag(MachineInstr::FmNoInfs);
if (!IsFiniteOnly) {
// Expand isfinite(x) => fabs(x) < inf
@@ -3864,9 +3864,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
R = B.buildSelect(Ty, Underflow, Zero, R);
- const auto &Options = MF.getTarget().Options;
-
- if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) {
+ if (!(Flags & MachineInstr::FmNoInfs)) {
auto OverflowCheckConst =
B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 71494be..4e11c4f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,6 +14,7 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI) {
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind) {
GCNRPTracker::LiveRegSet LiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
+ if (RegKind != GCNRegPressure::TOTAL_KINDS &&
+ GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
+ continue;
if (!LIS.hasInterval(Reg))
continue;
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
@@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
#undef PFX
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
+ GCNRegPressure::RegKind Kind,
+ LiveIntervals &LIS,
+ const MachineLoopInfo *MLI) {
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ auto &OS = dbgs();
+ const char *RegName = GCNRegPressure::getName(Kind);
+
+ unsigned MaxNumRegs = 0;
+ const MachineInstr *MaxPressureMI = nullptr;
+ GCNUpwardRPTracker RPT(LIS);
+ for (const MachineBasicBlock &MBB : MF) {
+ RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
+ for (const MachineInstr &MI : reverse(MBB)) {
+ RPT.recede(MI);
+ unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
+ if (NumRegs > MaxNumRegs) {
+ MaxNumRegs = NumRegs;
+ MaxPressureMI = &MI;
+ }
+ }
+ }
+
+ SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
+
+ // Max pressure can occur at either the early-clobber or register slot.
+ // Choose the maximum liveset between both slots. This is ugly but this is
+ // diagnostic code.
+ SlotIndex ECSlot = MISlot.getRegSlot(true);
+ SlotIndex RSlot = MISlot.getRegSlot(false);
+ GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
+ GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
+ unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
+ unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
+ GCNRPTracker::LiveRegSet *LiveSet =
+ ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
+ SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
+ assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
+
+ // Split live registers into single-def and multi-def sets.
+ GCNRegPressure SDefPressure, MDefPressure;
+ SmallVector<Register, 16> SDefRegs, MDefRegs;
+ for (auto [Reg, LaneMask] : *LiveSet) {
+ assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.getNumValNums() == 1 ||
+ (LI.hasSubRanges() &&
+ llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
+ return SR.getNumValNums() == 1;
+ }))) {
+ SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
+ SDefRegs.push_back(Reg);
+ } else {
+ MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
+ MDefRegs.push_back(Reg);
+ }
+ }
+ unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
+ unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
+ assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);
+
+ auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) {
+ return Printable([&, MBB, SI](raw_ostream &OS) {
+ OS << SI << ':' << printMBBReference(*MBB);
+ if (MLI)
+ if (const MachineLoop *ML = MLI->getLoopFor(MBB))
+ OS << " (LoopHdr " << printMBBReference(*ML->getHeader())
+ << ", Depth " << ML->getLoopDepth() << ")";
+ });
+ };
+
+ auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
+ GCNRegPressure RegPressure;
+ RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
+ OS << " " << printReg(Reg, TRI) << ':'
+ << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
+ << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
+ << RegName << "s)\n";
+
+ // Use std::map to sort def/uses by SlotIndex.
+ std::map<SlotIndex, const MachineInstr *> Instrs;
+ for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) {
+ Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
+ }
+
+ for (const auto &[SI, MI] : Instrs) {
+ OS << " ";
+ if (MI->definesRegister(Reg, TRI))
+ OS << "def ";
+ if (MI->readsRegister(Reg, TRI))
+ OS << "use ";
+ OS << printLoc(MI->getParent(), SI) << ": " << *MI;
+ }
+ };
+
+ OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
+ << " ***\n";
+ OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
+ << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
+ << *MaxPressureMI;
+
+ OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
+ << RegName << "s):\n";
+
+ // Sort SDefRegs by number of uses (smallest first)
+ llvm::sort(SDefRegs, [&](Register A, Register B) {
+ return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) <
+ std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end());
+ });
+
+ for (const Register Reg : SDefRegs) {
+ PrintRegInfo(Reg, LiveSet->lookup(Reg));
+ }
+
+ OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
+ << RegName << "s):\n";
+ for (const Register Reg : MDefRegs) {
+ PrintRegInfo(Reg, LiveSet->lookup(Reg));
+ }
+}
+#endif
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 898d1ff..979a8b0 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -31,6 +31,12 @@ class SlotIndex;
struct GCNRegPressure {
enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
+ static constexpr const char *getName(RegKind Kind) {
+ const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"};
+ assert(Kind < TOTAL_KINDS);
+ return Names[Kind];
+ }
+
GCNRegPressure() {
clear();
}
@@ -41,6 +47,11 @@ struct GCNRegPressure {
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
+ unsigned getNumRegs(RegKind Kind) const {
+ assert(Kind < TOTAL_KINDS);
+ return Value[Kind];
+ }
+
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
@@ -138,6 +149,12 @@ struct GCNRegPressure {
void dump() const;
+ static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ return (RegKind)getRegKind(MRI.getRegClass(Reg), STI);
+ }
+
private:
static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2;
@@ -294,8 +311,10 @@ public:
}
};
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
+GCNRPTracker::LiveRegSet
+getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
@@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
-
/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
@@ -524,6 +540,11 @@ public:
}
};
+LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF,
+ GCNRegPressure::RegKind Kind,
+ LiveIntervals &LIS,
+ const MachineLoopInfo *MLI);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index bdc0810..58482ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#define DUMP_MAX_REG_PRESSURE
+static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
+ "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,
+ cl::desc("Print a list of live registers along with their def/uses at the "
+ "point of maximum register pressure before scheduling."),
+ cl::init(false));
+
+static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(
+ "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,
+ cl::desc("Print a list of live registers along with their def/uses at the "
+ "point of maximum register pressure after scheduling."),
+ cl::init(false));
+#endif
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -960,6 +975,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
RegionLiveOuts.buildLiveRegMap();
}
+#ifdef DUMP_MAX_REG_PRESSURE
+ if (PrintMaxRPRegUsageBeforeScheduler) {
+ dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
+ dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
+ LIS->dump();
+ }
+#endif
+
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
@@ -995,6 +1018,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
Stage->finalizeGCNSchedStage();
}
+
+#ifdef DUMP_MAX_REG_PRESSURE
+ if (PrintMaxRPRegUsageAfterScheduler) {
+ dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
+ dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
+ LIS->dump();
+ }
+#endif
}
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 64e34db..5f6d742 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -260,8 +260,12 @@ class NSAHelper {
}
class MIMGNSAHelper<int num_addrs,
- list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)>
- : NSAHelper<> {
+ list<RegisterOperand> addr_types_in=[]>
+ : NSAHelper<> {
+ list<RegisterOperand> addr_types =
+ !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs),
+ addr_types_in);
+
list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i);
let AddrIns = !dag(ins, addr_types, AddrAsmNames);
let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
@@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = "">
// Base class for all NSA MIMG instructions.
// Note that 1-dword addresses always use non-NSA variants.
class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[],
+ list<RegisterOperand> addr_types=[],
RegisterOperand LastAddrRC = VGPROp_32>
: MIMG<outs, dns>, MIMGe_gfx11<op> {
let SubtargetPredicate = isGFX11Only;
@@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
}
class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[]>
+ list<RegisterOperand> addr_types=[]>
: VIMAGE<outs, dns>, VIMAGEe<op> {
let SubtargetPredicate = isGFX12Plus;
let AssemblerPredicate = isGFX12Plus;
@@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int VAddrDwords = !srl(Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
- RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
- list<RegisterClass> GFX11PlusAddrTypes =
- !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
- isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
- IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96],
- true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
+ RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32);
+ list<RegisterOperand> GFX11PlusAddrTypes =
+ !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32],
+ isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64],
+ IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96],
+ true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC>
@@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC>
}
class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11",
addr_types> {
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
@@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
bit isDual, bit isBVH8,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5e27b37..6dcbced 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1019,7 +1019,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
// SMEM and VMEM operations. So there will never be
// outstanding address translations for both SMEM and
// VMEM at the same time.
- setScoreLB(T, CurrScore - 1);
+ setScoreLB(T, getScoreUB(T) - 1);
PendingEvents &= ~(1 << OtherEvent);
}
for (const MachineOperand &Op : Inst.all_uses())
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index eac9fd4..27e5ee9c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3726,6 +3726,23 @@ def : GCNPat <
} // End foreach Ty = ...
} // End AddedComplexity = 1
+let True16Predicate = UseRealTrue16Insts in {
+def : GCNPat<
+ (i32 (DivergentBinFrag<or>
+ (i32 (zext i16:$src_lo)),
+ (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi)))))
+ )),
+ (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+def : GCNPat<
+ (i32 (DivergentBinFrag<or>
+ (i32 (bitconvert (v2i16 (build_vector (i16 0), (i16 VGPR_16:$src_hi))))),
+ (i32 (zext i16:$src_lo))
+ )),
+ (REG_SEQUENCE VGPR_32, $src_lo, lo16, $src_hi, hi16)
+>;
+}
+
let True16Predicate = UseRealTrue16Insts in
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index be1c883..ebd2e7e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2356,7 +2356,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
AMDGPU::M0)
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE:
@@ -2446,7 +2446,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
AMDGPU::M0)
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask));
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case AMDGPU::SI_SPILL_V16_RESTORE:
case AMDGPU::SI_SPILL_V32_RESTORE:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index bef4868..7e7ee75 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -280,6 +280,10 @@ static unsigned getTcgen05LdOpcode(unsigned IID, bool enablePack) {
}
void NVPTXDAGToDAGISel::SelectTcgen05Ld(SDNode *N, bool hasOffset) {
+ if (!Subtarget->hasTcgen05InstSupport())
+ report_fatal_error(
+ "tcgen05.ld is not supported on this architecture variant");
+
SDLoc DL(N);
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -2136,6 +2140,10 @@ static unsigned getTcgen05StOpcode(unsigned IID, bool enableUnpack) {
}
void NVPTXDAGToDAGISel::SelectTcgen05St(SDNode *N, bool hasOffset) {
+ if (!Subtarget->hasTcgen05InstSupport())
+ report_fatal_error(
+ "tcgen05.st is not supported on this architecture variant");
+
SDLoc DL(N);
unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a1fb665..272c21f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -233,7 +233,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI,
// target supports 256-bit loads/stores
if (!CanLowerTo256Bit)
return std::nullopt;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MVT::v2i8:
case MVT::v2i64:
case MVT::v2f64:
@@ -248,7 +248,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI,
// global and the target supports 256-bit loads/stores.
if (!CanLowerTo256Bit)
return std::nullopt;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MVT::v2i16: // <1 x i16x2>
case MVT::v2f16: // <1 x f16x2>
case MVT::v2bf16: // <1 x bf16x2>
@@ -270,7 +270,7 @@ getVectorLoweringShape(EVT VectorEVT, const NVPTXSubtarget &STI,
// target supports 256-bit loads/stores
if (!CanLowerTo256Bit)
return std::nullopt;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MVT::v2f32: // <1 x f32x2>
case MVT::v4f32: // <2 x f32x2>
case MVT::v2i32: // <1 x i32x2>
@@ -6749,7 +6749,7 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::BinOp::Xchg:
if (BitWidth == 128)
return AtomicExpansionKind::None;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case AtomicRMWInst::BinOp::And:
case AtomicRMWInst::BinOp::Or:
case AtomicRMWInst::BinOp::Xor:
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6c14cf0..dfde0cc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -101,6 +101,22 @@ def PrmtMode : Operand<i32> {
// NVPTX Instruction Predicate Definitions
//===----------------------------------------------------------------------===//
+// Checks PTX version and family-specific and architecture-specific SM versions.
+// For example, sm_100{f/a} and any future variants in the same family will match
+// for any PTX version greater than or equal to `PTXVersion`.
+class PTXWithFamilySMs<int PTXVersion, list<int> SMVersions> :
+ Predicate<"Subtarget->hasPTXWithFamilySMs(" # PTXVersion # ", {" #
+ !interleave(SMVersions, ", ") # "})">;
+
+// Checks PTX version and architecture-specific SM versions.
+// For example, sm_100{a} will match for any PTX version
+// greater than or equal to `PTXVersion`.
+class PTXWithAccelSMs<int PTXVersion, list<int> SMVersions> :
+ Predicate<"Subtarget->hasPTXWithAccelSMs(" # PTXVersion # ", {" #
+ !interleave(SMVersions, ", ") # "})">;
+
+// Helper predicate to call a subtarget method.
+class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # SubtargetMethod # "()">;
def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index a8b854f..22cf3a7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -5103,8 +5103,8 @@ let Predicates = [hasSM<90>, hasPTX<78>] in {
def EXIT : NullaryInst<"exit", int_nvvm_exit>;
// Tcgen05 intrinsics
-let isConvergent = true, Predicates = [hasTcgen05Instructions] in {
-
+let isConvergent = true in {
+let Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
(ins ADDR:$dst, B32:$ncols),
@@ -5156,15 +5156,6 @@ defm TCGEN05_COMMIT_CG2 : TCGEN05_COMMIT_INTR<"", "2">;
defm TCGEN05_COMMIT_S64_CG1 : TCGEN05_COMMIT_INTR<"shared", "1">;
defm TCGEN05_COMMIT_S64_CG2 : TCGEN05_COMMIT_INTR<"shared", "2">;
-multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
- def "" : BasicNVPTXInst<(outs),
- (ins ADDR:$tmem_addr),
- "tcgen05.shift.cta_group::" # num # ".down",
- [(Intr addr:$tmem_addr)]>;
-}
-defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
-defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
-
multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
defvar dst_fmt = !if(!eq(src_fmt, ""), "", ".b8x16");
defvar fmt_asm = StrJoin<".", [dst_fmt, src_fmt]>.ret;
@@ -5195,9 +5186,22 @@ foreach src_fmt = ["", "b6x16_p32", "b4x16_p64"] in {
defm TCGEN05_CP_64x128_2 # src_fmt : TCGEN05_CP_INTR<"64x128b", src_fmt, "warpx2::01_23">;
defm TCGEN05_CP_32x128 # src_fmt : TCGEN05_CP_INTR<"32x128b", src_fmt, "warpx4">;
}
+} // Predicates
+
+let Predicates = [callSubtarget<"hasTcgen05ShiftSupport">] in {
+multiclass TCGEN05_SHIFT_INTR<string num, Intrinsic Intr> {
+ def "" : BasicNVPTXInst<(outs),
+ (ins ADDR:$tmem_addr),
+ "tcgen05.shift.cta_group::" # num # ".down",
+ [(Intr addr:$tmem_addr)]>;
+}
+defm TCGEN05_SHIFT_CG1: TCGEN05_SHIFT_INTR<"1", int_nvvm_tcgen05_shift_down_cg1>;
+defm TCGEN05_SHIFT_CG2: TCGEN05_SHIFT_INTR<"2", int_nvvm_tcgen05_shift_down_cg2>;
+} // Predicates
+
} // isConvergent
-let hasSideEffects = 1, Predicates = [hasTcgen05Instructions] in {
+let hasSideEffects = 1, Predicates = [callSubtarget<"hasTcgen05InstSupport">] in {
def tcgen05_fence_before_thread_sync: NullaryInst<
"tcgen05.fence::before_thread_sync", int_nvvm_tcgen05_fence_before_thread_sync>;
@@ -5231,8 +5235,7 @@ class TCGEN05_LDST_REGINFO<int Veclen> {
//
class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
- NVPTXInst<(outs), (ins), "?", []>,
- Requires<[hasTcgen05Instructions]> {
+ NVPTXInst<(outs), (ins), "?", []> {
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
@@ -5256,8 +5259,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
//
class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
- NVPTXInst<(outs), (ins), "?", []>,
- Requires<[hasTcgen05Instructions]> {
+ NVPTXInst<(outs), (ins), "?", []> {
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index c548967..989be50 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -72,6 +72,40 @@ const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
return TSInfo.get();
}
+bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const {
+ unsigned PTXVer = getPTXVersion();
+ if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
+ return false;
+
+ unsigned SMVer = getSmVersion();
+ return llvm::any_of(SMVersions, [&](unsigned SM) {
+ // sm_101 is a different family, never group it with sm_10x.
+ if (SMVer == 101 || SM == 101)
+ return SMVer == SM &&
+ // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
+ // supported.
+ !(PTXVer >= 90 && SMVer == 101);
+
+ return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
+ });
+}
+
+bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const {
+ unsigned PTXVer = getPTXVersion();
+ if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
+ return false;
+
+ unsigned SMVer = getSmVersion();
+ return llvm::any_of(SMVersions, [&](unsigned SM) {
+ return SMVer == SM &&
+ // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
+ // supported.
+ !(PTXVer >= 90 && SMVer == 101);
+ });
+}
+
bool NVPTXSubtarget::allowFP16Math() const {
return hasFP16Math() && NoF16Math == false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e81c56b..194dbdc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -73,6 +73,18 @@ public:
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
+ // Checks PTX version and family-specific and architecture-specific SM
+ // versions. For example, sm_100{f/a} and any future variants in the same
+ // family will match for any PTX version greater than or equal to
+ // `PTXVersion`.
+ bool hasPTXWithFamilySMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const;
+ // Checks PTX version and architecture-specific SM versions.
+ // For example, sm_100{a} will match for any PTX version greater than or equal
+ // to `PTXVersion`.
+ bool hasPTXWithAccelSMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const;
+
bool has256BitVectorLoadStore(unsigned AS) const {
return SmVersion >= 100 && PTXVersion >= 88 &&
AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
@@ -127,6 +139,27 @@ public:
return HasTcgen05 && PTXVersion >= MinPTXVersion;
}
+ // Checks following instructions support:
+ // - tcgen05.ld/st
+ // - tcgen05.alloc/dealloc/relinquish
+ // - tcgen05.cp
+ // - tcgen05.fence/wait
+ // - tcgen05.commit
+ bool hasTcgen05InstSupport() const {
+ // sm_101 renamed to sm_110 in PTX 9.0
+ return hasPTXWithFamilySMs(90, {100, 110}) ||
+ hasPTXWithFamilySMs(88, {100, 101}) ||
+ hasPTXWithAccelSMs(86, {100, 101});
+ }
+
+ // Checks tcgen05.shift instruction support.
+ bool hasTcgen05ShiftSupport() const {
+ // sm_101 renamed to sm_110 in PTX 9.0
+ return hasPTXWithAccelSMs(90, {100, 110, 103}) ||
+ hasPTXWithAccelSMs(88, {100, 101, 103}) ||
+ hasPTXWithAccelSMs(86, {100, 101});
+ }
+
bool hasTcgen05MMAScaleInputDImm() const {
return FullSmVersion == 1003 && PTXVersion >= 86;
}
@@ -158,6 +191,7 @@ public:
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
+ unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; }
// GPUs with "a" suffix have architecture-accelerated features that are
// supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows distinguishing
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 21dbb7c..8851a0f 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1659,6 +1659,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(
Operands, ErrorInfo, -1, (1 << 5) - 1,
"immediate must be non-zero in the range");
+ case Match_InvalidXSfmmVType: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return generateXSfmmVTypeError(ErrorLoc);
+ }
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return generateVTypeError(ErrorLoc);
@@ -1688,7 +1692,7 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
(1 << 25) - 1);
// HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td.
case Match_InvalidBareSymbolQC_E_LI:
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
// END HACK
case Match_InvalidBareSImm32:
return generateImmOutOfRangeError(Operands, ErrorInfo,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 70b7c43..e75dfe3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -142,6 +142,22 @@ enum {
ReadsPastVLShift = DestEEWShift + 2,
ReadsPastVLMask = 1ULL << ReadsPastVLShift,
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ AltFmtTypeShift = ReadsPastVLShift + 1,
+ AltFmtTypeMask = 3ULL << AltFmtTypeShift,
+
+ // XSfmmbase
+ HasTWidenOpShift = AltFmtTypeShift + 2,
+ HasTWidenOpMask = 1ULL << HasTWidenOpShift,
+
+ HasTMOpShift = HasTWidenOpShift + 1,
+ HasTMOpMask = 1ULL << HasTMOpShift,
+
+ HasTKOpShift = HasTMOpShift + 1,
+ HasTKOpMask = 1ULL << HasTKOpShift,
};
// Helper functions to read TSFlags.
@@ -183,6 +199,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) {
return TSFlags & HasRoundModeOpMask;
}
+enum class AltFmtType { DontCare, NotAltFmt, AltFmt };
+static inline AltFmtType getAltFmtType(uint64_t TSFlags) {
+ return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift);
+}
+
/// \returns true if this instruction uses vxrm
static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; }
@@ -204,11 +225,47 @@ static inline bool readsPastVL(uint64_t TSFlags) {
return TSFlags & ReadsPastVLMask;
}
+// XSfmmbase
+static inline bool hasTWidenOp(uint64_t TSFlags) {
+ return TSFlags & HasTWidenOpMask;
+}
+
+static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; }
+
+static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; }
+
+static inline unsigned getTNOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags));
+ unsigned Offset = 3;
+ if (hasTKOp(TSFlags))
+ Offset = 4;
+ return Desc.getNumOperands() - Offset;
+}
+
+static inline unsigned getTMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags));
+ if (hasTKOp(TSFlags))
+ return Desc.getNumOperands() - 5;
+ // vtzero.t
+ return Desc.getNumOperands() - 4;
+}
+
+static inline unsigned getTKOpNum(const MCInstrDesc &Desc) {
+ [[maybe_unused]] const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags));
+ return Desc.getNumOperands() - 3;
+}
+
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
// instructions with VL also have SEW.
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
+ // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit.
+ if (hasTWidenOp(TSFlags))
+ return getTNOpNum(Desc);
unsigned Offset = 2;
if (hasVecPolicyOp(TSFlags))
Offset = 3;
@@ -226,7 +283,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
assert(hasSEWOp(TSFlags));
unsigned Offset = 1;
- if (hasVecPolicyOp(TSFlags))
+ if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags))
Offset = 2;
return Desc.getNumOperands() - Offset;
}
@@ -243,6 +300,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) {
if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
return -1;
+ if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags))
+ return getTMOpNum(Desc) - 1;
+
// The operand order
// --------------------------------------
// | n-1 (if any) | n-2 | n-3 | n-4 |
@@ -385,7 +445,9 @@ enum OperandType : unsigned {
OPERAND_SEW_MASK,
// Vector rounding mode for VXRM or FRM.
OPERAND_VEC_RM,
- OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM,
+ // Vtype operand for XSfmm extension.
+ OPERAND_XSFMM_VTYPE,
+ OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index cf8d120..9ed3b97 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -168,10 +168,13 @@ struct DemandedFields {
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
// vill is unset.
bool VILL = false;
+ bool UseTWiden = false;
+ bool UseAltFmt = false;
// Return true if any part of VTYPE was used
bool usedVTYPE() const {
- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
+ return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL ||
+ UseTWiden || UseAltFmt;
}
// Return true if any property of VL was used
@@ -187,6 +190,8 @@ struct DemandedFields {
TailPolicy = true;
MaskPolicy = true;
VILL = true;
+ UseTWiden = true;
+ UseAltFmt = true;
}
// Mark all VL properties as demanded
@@ -212,6 +217,8 @@ struct DemandedFields {
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
VILL |= B.VILL;
+ UseAltFmt |= B.UseAltFmt;
+ UseTWiden |= B.UseTWiden;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -258,7 +265,9 @@ struct DemandedFields {
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy << ", ";
- OS << "VILL=" << VILL;
+ OS << "VILL=" << VILL << ", ";
+ OS << "UseAltFmt=" << UseAltFmt << ", ";
+ OS << "UseTWiden=" << UseTWiden;
OS << "}";
}
#endif
@@ -328,6 +337,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
RISCVVType::isMaskAgnostic(NewVType))
return false;
+ if (Used.UseTWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
+ RISCVVType::hasXSfmmWiden(NewVType) ||
+ (RISCVVType::hasXSfmmWiden(CurVType) &&
+ RISCVVType::getXSfmmWiden(CurVType) !=
+ RISCVVType::getXSfmmWiden(NewVType))))
+ return false;
+ if (Used.UseAltFmt &&
+ RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType))
+ return false;
return true;
}
@@ -479,6 +497,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.TailPolicy = false;
}
+ Res.UseAltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
+ RISCVII::AltFmtType::DontCare;
+ Res.UseTWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
+
return Res;
}
@@ -510,6 +533,8 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t SEWLMULRatioOnly : 1;
+ uint8_t AltFmt : 1;
+ uint8_t TWiden : 3;
public:
VSETVLIInfo()
@@ -586,6 +611,8 @@ public:
RISCVVType::VLMUL getVLMUL() const { return VLMul; }
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
+ bool getAltFmt() const { return AltFmt; }
+ unsigned getTWiden() const { return TWiden; }
bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
@@ -647,21 +674,31 @@ public:
SEW = RISCVVType::getSEW(VType);
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+ AltFmt = RISCVVType::isAltFmt(VType);
+ TWiden =
+ RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0;
}
- void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
+ void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt,
+ unsigned W) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
+ AltFmt = Altfmt;
+ TWiden = W;
}
+ void setAltFmt(bool AF) { AltFmt = AF; }
+
void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
+ if (TWiden != 0)
+ return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}
@@ -674,9 +711,9 @@ public:
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
- return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+ return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
- Other.MaskAgnostic);
+ Other.MaskAgnostic, Other.AltFmt, Other.TWiden);
}
unsigned getSEWLMULRatio() const {
@@ -825,7 +862,9 @@ public:
<< "SEW=e" << (unsigned)SEW << ", "
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
- << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
+ << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", "
+ << "TWiden=" << (unsigned)TWiden << ", "
+ << "AltFmt=" << (bool)AltFmt << "}";
}
#endif
};
@@ -853,6 +892,11 @@ struct BlockData {
BlockData() = default;
};
+enum TKTMMode {
+ VSETTK = 0,
+ VSETTM = 1,
+};
+
class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
@@ -908,6 +952,7 @@ private:
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
+ bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const;
};
} // end anonymous namespace
@@ -945,6 +990,18 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else if (RISCVInstrInfo::isXSfmmVectorConfigTNInstr(MI)) {
+ assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT ||
+ MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0);
+ switch (MI.getOpcode()) {
+ case RISCV::PseudoSF_VSETTNTX0:
+ NewInfo.setAVLVLMAX();
+ break;
+ case RISCV::PseudoSF_VSETTNT:
+ Register ATNReg = MI.getOperand(1).getReg();
+ NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg);
+ break;
+ }
} else {
assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETVLIX0);
@@ -1005,11 +1062,34 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+ bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt;
+ InstrInfo.setAltFmt(AltFmt);
+
unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ if (RISCVII::hasTWidenOp(TSFlags)) {
+ const MachineOperand &TWidenOp =
+ MI.getOperand(MI.getNumExplicitOperands() - 1);
+ unsigned TWiden = TWidenOp.getImm();
+
+ InstrInfo.setAVLVLMAX();
+ if (RISCVII::hasVLOp(TSFlags)) {
+ const MachineOperand &TNOp =
+ MI.getOperand(RISCVII::getTNOpNum(MI.getDesc()));
+
+ if (TNOp.getReg().isVirtual())
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS),
+ TNOp.getReg());
+ }
+
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden);
+
+ return InstrInfo;
+ }
+
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isImm()) {
@@ -1045,7 +1125,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
}
#endif
- InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ // TODO: Propagate the twiden from previous vtype for potential reuse.
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt,
+ /*TWiden*/ 0);
forwardVSETVLIAVL(InstrInfo);
@@ -1053,10 +1135,33 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc DL,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
-
+ MachineBasicBlock::iterator InsertPt,
+ DebugLoc DL, const VSETVLIInfo &Info,
+ const VSETVLIInfo &PrevInfo) {
++NumInsertedVSETVL;
+
+ if (Info.getTWiden()) {
+ if (Info.hasAVLVLMAX()) {
+ Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0))
+ .addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE());
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
+ }
+ } else {
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Info.getAVLReg())
+ .addImm(Info.encodeVTYPE());
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MI);
+ }
+ return;
+ }
+
if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
@@ -1198,7 +1303,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
// be coalesced into another vsetvli since we won't demand any fields.
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
NewInfo.setAVLImm(1);
- NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
+ NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true,
+ /*AltFmt*/ false, /*W*/ 0);
Info = NewInfo;
return;
}
@@ -1240,7 +1346,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
- IncomingInfo.getMaskAgnostic());
+ IncomingInfo.getMaskAgnostic(),
+ (Demanded.UseAltFmt ? IncomingInfo : Info).getAltFmt(),
+ Demanded.UseTWiden ? IncomingInfo.getTWiden() : 0);
// If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
// the AVL.
@@ -1293,7 +1401,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
- isVectorCopy(ST->getRegisterInfo(), MI))
+ isVectorCopy(ST->getRegisterInfo(), MI) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
HadVectorOp = true;
transferAfter(Info, MI);
@@ -1675,6 +1784,12 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+ // TODO: Support XSfmm.
+ if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) {
+ NextMI = nullptr;
+ continue;
+ }
if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
Used.doUnion(getDemanded(MI, ST));
@@ -1788,6 +1903,65 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
}
}
+bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
+ TKTMMode Mode) const {
+
+ bool Changed = false;
+ for (auto &MI : MBB) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) ||
+ !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags))
+ continue;
+
+ VSETVLIInfo CurrInfo = computeInfoForInstr(MI);
+
+ if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags))
+ continue;
+
+ if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags))
+ continue;
+
+ unsigned OpNum = 0;
+ unsigned Opcode = 0;
+ switch (Mode) {
+ case VSETTK:
+ OpNum = RISCVII::getTKOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTK;
+ break;
+ case VSETTM:
+ OpNum = RISCVII::getTMOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTM;
+ break;
+ }
+
+ assert(OpNum && Opcode && "Invalid OpNum or Opcode");
+
+ MachineOperand &Op = MI.getOperand(OpNum);
+
+ auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Op.getReg())
+ .addImm(Log2_32(CurrInfo.getSEW()))
+ .addImm(Log2_32(CurrInfo.getTWiden()) + 1);
+
+ Changed = true;
+ Register Reg = Op.getReg();
+ Op.setReg(Register());
+ Op.setIsKill(false);
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*TmpMI);
+ LiveInterval &LI = LIS->getInterval(Reg);
+
+ // Erase the AVL operand from the instruction.
+ LIS->shrinkToUses(&LI);
+ // TODO: Enable this once needVSETVLIPHI is supported.
+ // SmallVector<LiveInterval *> SplitLIs;
+ // LIS->splitSeparateComponents(LI, SplitLIs);
+ }
+ }
+ return Changed;
+}
+
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
@@ -1865,6 +2039,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ insertVSETMTK(MBB, VSETTM);
+ insertVSETMTK(MBB, VSETTK);
+ }
+
BlockInfo.clear();
return HaveVectorOp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 2afd77a..5b06303 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -267,6 +267,22 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
// operands' VLs.
bit ReadsPastVL = 0;
let TSFlags{26} = ReadsPastVL;
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ bits<2> AltFmtType = 0;
+ let TSFlags{28-27} = AltFmtType;
+
+ // XSfmmbase
+ bit HasTWidenOp = 0;
+ let TSFlags{29} = HasTWidenOp;
+
+ bit HasTmOp = 0;
+ let TSFlags{30} = HasTmOp;
+
+ bit HasTkOp = 0;
+ let TSFlags{31} = HasTkOp;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 96e1078..ddb53a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3005,6 +3005,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
else
Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
break;
+ case RISCVOp::OPERAND_XSFMM_VTYPE:
+ Ok = RISCVVType::isValidXSfmmVType(Imm);
+ break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
@@ -3670,6 +3673,11 @@ std::string RISCVInstrInfo::createMIROperandComment(
RISCVVType::printVType(Imm, OS);
break;
}
+ case RISCVOp::OPERAND_XSFMM_VTYPE: {
+ unsigned Imm = Op.getImm();
+ RISCVVType::printXSfmmVType(Imm, OS);
+ break;
+ }
case RISCVOp::OPERAND_SEW:
case RISCVOp::OPERAND_SEW_MASK: {
unsigned Log2SEW = Op.getImm();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 298d35a..c1b23af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -128,6 +128,9 @@ defvar TAIL_AGNOSTIC = 1;
defvar TU_MU = 0;
defvar TA_MU = 1;
defvar TA_MA = 3;
+defvar DONT_CARE_ALTFMT = 0;
+defvar IS_NOT_ALTFMT = 1;
+defvar IS_ALTFMT = 2;
//===----------------------------------------------------------------------===//
// Utilities.
@@ -159,7 +162,8 @@ class PseudoToVInst<string PseudoInst> {
["_M4", ""],
["_M8", ""],
["_SE", ""],
- ["_RM", ""]
+ ["_RM", ""],
+ ["_ALT", ""]
];
string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
!subst(AffixSubst[0], AffixSubst[1], Acc));
@@ -6396,7 +6400,7 @@ let Defs = [VXSAT] in {
// 13. Vector Floating-Point Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -6565,7 +6569,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
@@ -6593,7 +6597,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS;
}
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -6612,7 +6616,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM;
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 15. Vector Mask Instructions
@@ -6703,7 +6707,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// 16.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach f = FPList in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
@@ -6718,7 +6722,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
}
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.3. Vector Slide Instructions
@@ -6730,10 +6734,10 @@ let Predicates = [HasVInstructions] in {
defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX;
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">;
defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF;
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.4. Vector Register Gather Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 557d873..6a4119a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -438,8 +438,10 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
- defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in {
+ defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
+ defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ }
}
}
foreach m = MxListW in {
@@ -449,7 +451,8 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPListW in {
foreach m = f.MxList in
- defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in
+ defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index a5ee701..5ad22e6b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -225,7 +225,7 @@ let Predicates = [HasVendorXSfmmbase] in {
def SF_VSETTM : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00001,
"sf.vsettm", "$rd, $rs1">;
def SF_VSETTK : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00010,
- "sf.vsettk", "$rd, $rs1">;
+ "sf.vsettk", "$rd, $rs1">;
def SF_VTDISCARD : SFInstVtDiscard<"sf.vtdiscard">;
def SF_VTMV_V_T : SFInstTileMoveOp<0b010000, (outs VR:$vd), (ins GPR:$rs1),
@@ -277,3 +277,144 @@ let Uses = [FRM], mayRaiseFPException = true in {
} // Predicates = [HasVendorXSfmm32a8f]
} // DecoderNamespace = "XSfvector"
+
+class VPseudoSF_VTileLoad
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileStore
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_V_T
+ : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_T_V
+ : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul<RegisterClass mtd_class>
+ : RISCVVPseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn,
+ AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul_FRM<RegisterClass mtd_class>
+ : RISCVVPseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm,
+ AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew,
+ ixlenimm:$twiden), []> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasRoundModeOp = 1;
+ let hasPostISelHook = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let Defs = [VL, VTYPE] in {
+ def PseudoSF_VSETTNT
+ : Pseudo<(outs GPR:$rd),
+ (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0
+ : Pseudo<(outs GPRNoX0:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0X0
+ : Pseudo<(outs GPRX0:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+
+let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in {
+ def PseudoSF_VSETTM
+ : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTK
+ : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+}
+
+foreach eew = [8, 16, 32, 64] in {
+ def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad;
+ def PseudoSF_VSTE # eew : VPseudoSF_VTileStore;
+}
+
+def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V;
+def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T;
+
+foreach a = I8Encodes in
+ foreach b = I8Encodes in
+ def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name)
+ : VPseudoSF_MatMul<TRM4>;
+
+let AltFmtType = IS_NOT_ALTFMT in
+ def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>;
+let AltFmtType = IS_ALTFMT in
+ def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>;
+
+foreach e1 = [5, 4] in
+ foreach e2 = [5, 4] in
+ def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2)
+ : VPseudoSF_MatMul_FRM<TRM4>;
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+ let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in
+ def PseudoSF_VTZERO_T
+ : RISCVVPseudo<(outs),
+ (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)>;
+ def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 3658817..dcae977 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -78,7 +78,41 @@ def isVectorConfigInstr
PseudoVSETVLI,
PseudoVSETVLIX0,
PseudoVSETVLIX0X0,
- PseudoVSETIVLI
+ PseudoVSETIVLI,
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
+ ]>>>;
+
+// Returns true if this is a PseudoSF_VSETTNT* instructions.
+def isXSfmmVectorConfigTNInstr
+ : TIIPredicate<"isXSfmmVectorConfigTNInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
+ ]>>>;
+
+// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK.
+def isXSfmmVectorConfigTMTKInstr
+ : TIIPredicate<"isXSfmmVectorConfigTMTKInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
+ ]>>>;
+
+// Returns true if this is a XSfmm vector configuration instruction.
+def isXSfmmVectorConfigInstr
+ : TIIPredicate<"isXSfmmVectorConfigInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0,
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
]>>>;
// Return true if this is 'vsetvli x0, x0, vtype' which preserves
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 40b6416..e9f43b9 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -178,6 +178,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Shadow stack pointer.
markSuperRegs(Reserved, RISCV::SSP);
+ // XSfmmbase
+ for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++)
+ markSuperRegs(Reserved, Reg);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6472334..47c24fc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
+ }
+
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasFP16() || !VT.isVector())
+ return false;
+
+ EVT ScalarVT = VT.getScalarType();
+ if (!ScalarVT.isSimple())
+ return false;
+
+ return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
+}
+
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
// ISel process runs DAGCombiner after legalization; this step is called
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a853..472ec67 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,6 +81,8 @@ private:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78b..0f6e1ca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U :
defm EXTMUL_HIGH_U :
SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
+// Pattern for i32x4.dot_i16x8_s
+def : Pat<
+ (v4i32 (add
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 0), (i32 1), (i32 2), (i32 3),
+ (i32 8), (i32 9), (i32 10), (i32 11),
+ (i32 16), (i32 17), (i32 18), (i32 19),
+ (i32 24), (i32 25), (i32 26), (i32 27)),
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 4), (i32 5), (i32 6), (i32 7),
+ (i32 12), (i32 13), (i32 14), (i32 15),
+ (i32 20), (i32 21), (i32 22), (i32 23),
+ (i32 28), (i32 29), (i32 30), (i32 31)))
+ ),
+ (v4i32 (DOT v8i16:$lhs, v8i16:$rhs))
+>;
+
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
@@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
// Relaxed (Negative) Multiply-Add (madd/nmadd)
//===----------------------------------------------------------------------===//
-multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
+multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
defm MADD_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
}
-defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+
+//===----------------------------------------------------------------------===//
+// FP16 (Negative) Multiply-Add (madd/nmadd)
+//===----------------------------------------------------------------------===//
+
+multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
+ defm MADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".madd\t$dst, $a, $b, $c",
+ vec.prefix#".madd", simdopA, reqs>;
+ defm NMADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".nmadd\t$dst, $a, $b, $c",
+ vec.prefix#".nmadd", simdopS, reqs>;
+}
+defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+
+// TODO: I think separate intrinsics should be introduced for these FP16 operations.
+def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (MADD_F16x8 V128:$a, V128:$b, V128:$c)>;
+def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>;
//===----------------------------------------------------------------------===//
// Laneselect
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
index 080a9c0..4e73070 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
@@ -84,11 +84,11 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits,
case Xtensa::CCOMPARE0:
if (FeatureBits[Xtensa::FeatureTimers1])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::CCOMPARE1:
if (FeatureBits[Xtensa::FeatureTimers2])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::CCOMPARE2:
if (FeatureBits[Xtensa::FeatureTimers3])
return true;
@@ -107,37 +107,37 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits,
case Xtensa::EXCSAVE1:
case Xtensa::EXCVADDR:
return FeatureBits[Xtensa::FeatureException];
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC2:
case Xtensa::EPS2:
case Xtensa::EXCSAVE2:
if (FeatureBits[Xtensa::FeatureHighPriInterrupts])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC3:
case Xtensa::EPS3:
case Xtensa::EXCSAVE3:
if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel3])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC4:
case Xtensa::EPS4:
case Xtensa::EXCSAVE4:
if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel4])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC5:
case Xtensa::EPS5:
case Xtensa::EXCSAVE5:
if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel5])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC6:
case Xtensa::EPS6:
case Xtensa::EXCSAVE6:
if (FeatureBits[Xtensa::FeatureHighPriInterruptsLevel6])
return true;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Xtensa::EPC7:
case Xtensa::EPS7:
case Xtensa::EXCSAVE7:
diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp
index acf8e4c..5ea63a9 100644
--- a/llvm/lib/TargetParser/RISCVTargetParser.cpp
+++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp
@@ -228,6 +228,10 @@ void printVType(unsigned VType, raw_ostream &OS) {
OS << ", mu";
}
+void printXSfmmVType(unsigned VType, raw_ostream &OS) {
+ OS << "e" << getSEW(VType) << ", w" << getXSfmmWiden(VType);
+}
+
unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul) {
unsigned LMul;
bool Fractional;
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index cea246e..950bb2b 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -258,7 +258,7 @@ static std::string computePowerDataLayout(const Triple &T) {
static std::string computeAMDDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
- return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ return "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
}
@@ -268,7 +268,7 @@ static std::string computeAMDDataLayout(const Triple &TT) {
// (address space 7), and 128-bit non-integral buffer resourcees (address
// space 8) which cannot be non-trivilally accessed by LLVM memory operations
// like getelementptr.
- return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+ return "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-"
"v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index 26ec4f3..e05fe28 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -1,3 +1,4 @@
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,9 +20,7 @@
#include "llvm/Transforms/Coroutines/CoroInstr.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-namespace llvm {
-
-namespace coro {
+namespace llvm::coro {
enum class CloneKind {
/// The shared resume function for a switch lowering.
@@ -149,8 +148,6 @@ public:
}
};
-} // end namespace coro
-
-} // end namespace llvm
+} // end namespace llvm::coro
#endif // LLVM_LIB_TRANSFORMS_COROUTINES_COROCLONER_H
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 471b9eb..cdb5852 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -38,7 +38,7 @@ public:
AnyResumeFnPtrTy(PointerType::getUnqual(Context)) {}
void lowerEarlyIntrinsics(Function &F);
};
-}
+} // namespace
// Replace a direct call to coro.resume or coro.destroy with an indirect call to
// an address returned by coro.subfn.addr intrinsic. This is done so that
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 52f4ffe..cc47a55 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -16,11 +16,7 @@
#include "llvm/Transforms/Coroutines/CoroInstr.h"
#include "llvm/Transforms/Coroutines/CoroShape.h"
-namespace llvm {
-
-class CallGraph;
-
-namespace coro {
+namespace llvm::coro {
bool isSuspendBlock(BasicBlock *BB);
bool declaresAnyIntrinsic(const Module &M);
@@ -61,7 +57,6 @@ void normalizeCoroutine(Function &F, coro::Shape &Shape,
CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
TargetTransformInfo &TTI,
ArrayRef<Value *> Arguments, IRBuilder<> &);
-} // End namespace coro.
-} // End namespace llvm
+} // End namespace llvm::coro
#endif
diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
index 6aaabca..f2444da 100644
--- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
@@ -137,8 +137,7 @@ struct RematGraph {
} // namespace
-namespace llvm {
-template <> struct GraphTraits<RematGraph *> {
+template <> struct llvm::GraphTraits<RematGraph *> {
using NodeRef = RematGraph::RematNode *;
using ChildIteratorType = RematGraph::RematNode **;
@@ -149,8 +148,6 @@ template <> struct GraphTraits<RematGraph *> {
static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
};
-} // end namespace llvm
-
// For each instruction identified as materializable across the suspend point,
// and its associated DAG of other rematerializable instructions,
// recreate the DAG of instructions after the suspend point.
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
index e474c07..81fe0c9 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
@@ -16,11 +16,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-namespace llvm {
-
-namespace coro {
-
-namespace {
+using namespace llvm;
+using namespace llvm::coro;
typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet;
@@ -71,7 +68,7 @@ static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
/// This happens during the all-instructions iteration, so it must not
/// delete the call.
static Instruction *
-lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape,
+lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const Shape &Shape,
SmallVectorImpl<Instruction *> &DeadInsts) {
IRBuilder<> Builder(AI);
auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
@@ -450,10 +447,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape,
Visitor.getMayWriteBeforeCoroBegin());
}
-} // namespace
-
-void collectSpillsFromArgs(SpillInfo &Spills, Function &F,
- const SuspendCrossingInfo &Checker) {
+void coro::collectSpillsFromArgs(SpillInfo &Spills, Function &F,
+ const SuspendCrossingInfo &Checker) {
// Collect the spills for arguments and other not-materializable values.
for (Argument &A : F.args())
for (User *U : A.users())
@@ -461,7 +456,7 @@ void collectSpillsFromArgs(SpillInfo &Spills, Function &F,
Spills[&A].push_back(cast<Instruction>(U));
}
-void collectSpillsAndAllocasFromInsts(
+void coro::collectSpillsAndAllocasFromInsts(
SpillInfo &Spills, SmallVector<AllocaInfo, 8> &Allocas,
SmallVector<Instruction *, 4> &DeadInstructions,
SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F,
@@ -516,8 +511,8 @@ void collectSpillsAndAllocasFromInsts(
}
}
-void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
- const SuspendCrossingInfo &Checker) {
+void coro::collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
+ const SuspendCrossingInfo &Checker) {
// We don't want the layout of coroutine frame to be affected
// by debug information. So we only choose to salvage dbg.values for
// whose value is already in the frame.
@@ -535,10 +530,9 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
/// Async and Retcon{Once} conventions assume that all spill uses can be sunk
/// after the coro.begin intrinsic.
-void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom,
- CoroBeginInst *CoroBegin,
- coro::SpillInfo &Spills,
- SmallVectorImpl<coro::AllocaInfo> &Allocas) {
+void coro::sinkSpillUsesAfterCoroBegin(
+ const DominatorTree &Dom, CoroBeginInst *CoroBegin, coro::SpillInfo &Spills,
+ SmallVectorImpl<coro::AllocaInfo> &Allocas) {
SmallSetVector<Instruction *, 32> ToMove;
SmallVector<Instruction *, 32> Worklist;
@@ -582,8 +576,9 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom,
Inst->moveBefore(InsertPt->getIterator());
}
-BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def,
- const DominatorTree &DT) {
+BasicBlock::iterator coro::getSpillInsertionPt(const coro::Shape &Shape,
+ Value *Def,
+ const DominatorTree &DT) {
BasicBlock::iterator InsertPt;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
@@ -625,7 +620,3 @@ BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def,
return InsertPt;
}
-
-} // End namespace coro.
-
-} // End namespace llvm.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7071876..943c223 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -471,7 +471,6 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
-public:
/// Create `select C, S1, S2`. Use only when the profile cannot be calculated
/// from existing profile metadata: if the Function has profiles, this will
/// set the profile of this select to "unknown".
@@ -484,6 +483,7 @@ public:
return Sel;
}
+public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 63e24a0..a330bb7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -110,8 +110,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
ShrAmt->getName() + ".z");
// There is no existing !prof metadata we can derive the !prof metadata for
// this select.
- Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper);
- IC.Builder.Insert(Select);
+ Value *Select = IC.Builder.CreateSelectWithUnknownProfile(ShrAmtZ, Lower,
+ Upper, DEBUG_TYPE);
Select->takeName(I);
return Select;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 82ac903..3f11cae 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
// 2) (fp_binop ({s|u}itofp x), FpC)
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
+ // Don't perform the fold on vectors, as the integer operation may be much
+ // more expensive than the float operation in that case.
+ if (BO.getType()->isVectorTy())
+ return nullptr;
+
std::array<Value *, 2> IntOps = {nullptr, nullptr};
Constant *Op1FpC = nullptr;
// Check for:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index c86092b..a6ec6c1 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}
+static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
+ AnnotationKind Kind) {
+ assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK &&
+ "Should not handle AnnotationOK here");
+ SmallString<32> Reason;
+ switch (Kind) {
+ case llvm::memprof::AnnotationKind::ExplicitSection:
+ ++NumOfMemProfExplicitSectionGlobalVars;
+ Reason.append("explicit section name");
+ break;
+ case llvm::memprof::AnnotationKind::DeclForLinker:
+ Reason.append("linker declaration");
+ break;
+ case llvm::memprof::AnnotationKind::ReservedName:
+ Reason.append("name starts with `llvm.`");
+ break;
+ default:
+ llvm_unreachable("Unexpected annotation kind");
+ }
+ LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
+ << Reason << ".\n");
+ return;
+}
+
struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
@@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
-// Returns true iff the global variable has custom section either by
-// __attribute__((section("name")))
-// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
-// or #pragma clang section directives
-// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
-static bool hasExplicitSectionName(const GlobalVariable &GVar) {
- if (GVar.hasSection())
- return true;
-
- auto Attrs = GVar.getAttributes();
- if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
- Attrs.hasAttribute("relro-section") ||
- Attrs.hasAttribute("rodata-section"))
- return true;
- return false;
-}
-
bool MemProfUsePass::annotateGlobalVariables(
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
return false;
if (!DataAccessProf) {
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
M.getContext().diagnose(DiagnosticInfoPGOProfile(
MemoryProfileFileName.data(),
StringRef("Data access profiles not found in memprof. Ignore "
@@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables(
DS_Warning));
return false;
}
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
bool Changed = false;
// Iterate all global variables in the module and annotate them based on
@@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables(
for (GlobalVariable &GVar : M.globals()) {
assert(!GVar.getSectionPrefix().has_value() &&
"GVar shouldn't have section prefix yet");
- if (GVar.isDeclarationForLinker())
- continue;
-
- if (hasExplicitSectionName(GVar)) {
- ++NumOfMemProfExplicitSectionGlobalVars;
- LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
- << " has explicit section name. Skip annotating.\n");
+ auto Kind = llvm::memprof::getAnnotationKind(GVar);
+ if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) {
+ HandleUnsupportedAnnotationKinds(GVar, Kind);
continue;
}
@@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables(
// TODO: Track string content hash in the profiles and compute it inside the
// compiler to categeorize the hotness string literals.
if (Name.starts_with(".str")) {
-
LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
continue;
}
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 66a2c76..09db464 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2626,7 +2626,7 @@ void ObjCARCOpt::OptimizeAutoreleasePools(Function &F) {
case ARCInstKind::Call:
if (!MayAutorelease(cast<CallBase>(Inst)))
break;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ARCInstKind::Autorelease:
case ARCInstKind::AutoreleaseRV:
case ARCInstKind::FusedRetainAutorelease:
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 56e0569..7cae94eb 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1295,6 +1295,24 @@ public:
return commonAlignment(InitialAlign, ElementSizeInBits / 8);
}
+ IntegerType *getIndexType(Value *Ptr) const {
+ return cast<IntegerType>(DL.getIndexType(Ptr->getType()));
+ }
+
+ Value *getIndex(Value *Ptr, uint64_t V) const {
+ return ConstantInt::get(getIndexType(Ptr), V);
+ }
+
+ Value *castToIndexType(Value *Ptr, Value *V, IRBuilder<> &Builder) const {
+ assert(isa<IntegerType>(V->getType()) &&
+ "Attempted to cast non-integral type to integer index");
+ // In case the data layout's index type differs in width from the type of
+ // the value we're given, truncate or zero extend to the appropriate width.
+ // We zero extend here as indices are unsigned.
+ return Builder.CreateZExtOrTrunc(V, getIndexType(Ptr),
+ V->getName() + ".cast");
+ }
+
/// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between
/// vectors.
MatrixTy loadMatrix(Type *Ty, Value *Ptr, MaybeAlign MAlign, Value *Stride,
@@ -1304,6 +1322,7 @@ public:
Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride());
Value *EltPtr = Ptr;
MatrixTy Result;
+ Stride = castToIndexType(Ptr, Stride, Builder);
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
Value *GEP = computeVectorAddr(
EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
@@ -1325,14 +1344,14 @@ public:
ShapeInfo ResultShape, Type *EltTy,
IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
- Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
+ Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I);
Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows *
ResultShape.NumColumns);
return loadMatrix(TileTy, TileStart, Align,
- Builder.getInt64(MatrixShape.getStride()), IsVolatile,
+ getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile,
ResultShape, Builder);
}
@@ -1363,14 +1382,15 @@ public:
MaybeAlign MAlign, bool IsVolatile, ShapeInfo MatrixShape,
Value *I, Value *J, Type *EltTy, IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
- Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
+ Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I);
Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() *
StoreVal.getNumColumns());
storeMatrix(TileTy, StoreVal, TileStart, MAlign,
- Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder);
+ getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile,
+ Builder);
}
/// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between
@@ -1380,6 +1400,7 @@ public:
IRBuilder<> &Builder) {
auto *VType = cast<FixedVectorType>(Ty);
Value *EltPtr = Ptr;
+ Stride = castToIndexType(Ptr, Stride, Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
Value *GEP = computeVectorAddr(
EltPtr,
@@ -2011,18 +2032,17 @@ public:
const unsigned TileM = std::min(M - K, unsigned(TileSize));
MatrixTy A =
loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
- LShape, Builder.getInt64(I), Builder.getInt64(K),
+ LShape, getIndex(APtr, I), getIndex(APtr, K),
{TileR, TileM}, EltType, Builder);
MatrixTy B =
loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
- RShape, Builder.getInt64(K), Builder.getInt64(J),
+ RShape, getIndex(BPtr, K), getIndex(BPtr, J),
{TileM, TileC}, EltType, Builder);
emitMatrixMultiply(Res, A, B, Builder, true, false,
getFastMathFlags(MatMul));
}
storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
- Builder.getInt64(I), Builder.getInt64(J), EltType,
- Builder);
+ getIndex(CPtr, I), getIndex(CPtr, J), EltType, Builder);
}
}
@@ -2254,15 +2274,14 @@ public:
/// Lower load instructions.
MatrixTy VisitLoad(LoadInst *Inst, const ShapeInfo &SI, Value *Ptr,
IRBuilder<> &Builder) {
- return LowerLoad(Inst, Ptr, Inst->getAlign(),
- Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI,
- Builder);
+ return LowerLoad(Inst, Ptr, Inst->getAlign(), getIndex(Ptr, SI.getStride()),
+ Inst->isVolatile(), SI, Builder);
}
MatrixTy VisitStore(StoreInst *Inst, const ShapeInfo &SI, Value *StoredVal,
Value *Ptr, IRBuilder<> &Builder) {
return LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(),
- Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI,
+ getIndex(Ptr, SI.getStride()), Inst->isVolatile(), SI,
Builder);
}
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index b187208..3ce569f 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -44,7 +44,7 @@ using namespace llvm;
STATISTIC(RemappedAtomMax, "Highest global NextAtomGroup (after mapping)");
void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) {
- auto CurGroup = DL->getAtomGroup();
+ uint64_t CurGroup = DL->getAtomGroup();
if (!CurGroup)
return;
@@ -62,21 +62,20 @@ void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) {
RemappedAtomMax = std::max<uint64_t>(NewGroup, RemappedAtomMax);
}
-namespace {
-void collectDebugInfoFromInstructions(const Function &F,
- DebugInfoFinder &DIFinder) {
+static void collectDebugInfoFromInstructions(const Function &F,
+ DebugInfoFinder &DIFinder) {
const Module *M = F.getParent();
- if (M) {
- // Inspect instructions to process e.g. DILexicalBlocks of inlined functions
- for (const auto &I : instructions(F))
- DIFinder.processInstruction(*M, I);
- }
+ if (!M)
+ return;
+ // Inspect instructions to process e.g. DILexicalBlocks of inlined functions
+ for (const Instruction &I : instructions(F))
+ DIFinder.processInstruction(*M, I);
}
// Create a predicate that matches the metadata that should be identity mapped
// during function cloning.
-MetadataPredicate createIdentityMDPredicate(const Function &F,
- CloneFunctionChangeType Changes) {
+static MetadataPredicate
+createIdentityMDPredicate(const Function &F, CloneFunctionChangeType Changes) {
if (Changes >= CloneFunctionChangeType::DifferentModule)
return [](const Metadata *MD) { return false; };
@@ -107,7 +106,6 @@ MetadataPredicate createIdentityMDPredicate(const Function &F,
return false;
};
}
-} // namespace
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
@@ -213,10 +211,9 @@ void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
const MetadataPredicate *IdentityMD) {
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc.getAllMetadata(MDs);
- for (auto MD : MDs) {
- NewFunc.addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
- Materializer, IdentityMD));
+ for (const auto &[Kind, MD] : MDs) {
+ NewFunc.addMetadata(Kind, *MapMetadata(MD, VMap, RemapFlag, TypeMapper,
+ Materializer, IdentityMD));
}
}
@@ -235,7 +232,6 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
for (const BasicBlock &BB : OldFunc) {
-
// Create a new basic block and copy instructions into it!
BasicBlock *CBB =
CloneBasicBlock(&BB, VMap, NameSuffix, &NewFunc, CodeInfo);
@@ -321,7 +317,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Cloning is always a Module level operation, since Metadata needs to be
// cloned.
- const auto RemapFlag = RF_None;
+ const RemapFlags RemapFlag = RF_None;
CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper,
Materializer, &IdentityMD);
@@ -346,8 +342,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// visiting the metadata attached to global values, which would allow this
// code to be deleted. Alternatively, perhaps give responsibility for this
// update to CloneFunctionInto's callers.
- auto *NewModule = NewFunc->getParent();
- auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ Module *NewModule = NewFunc->getParent();
+ NamedMDNode *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
// Avoid multiple insertions of the same DICompileUnit to NMD.
SmallPtrSet<const void *, 8> Visited(llvm::from_range, NMD->operands());
@@ -355,7 +351,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// the function (e.g. as instructions' scope).
DebugInfoFinder DIFinder;
collectDebugInfoFromInstructions(*OldFunc, DIFinder);
- for (auto *Unit : DIFinder.compile_units()) {
+ for (DICompileUnit *Unit : DIFinder.compile_units()) {
MDNode *MappedUnit =
MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);
if (Visited.insert(MappedUnit).second)
@@ -821,17 +817,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
--PredCount[Pred];
// Figure out how many entries to remove from each PHI.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- ++PredCount[PN->getIncomingBlock(i)];
+ for (BasicBlock *Pred : PN->blocks())
+ ++PredCount[Pred];
// At this point, the excess predecessor entries are positive in the
// map. Loop over all of the PHIs and remove excess predecessor
// entries.
BasicBlock::iterator I = NewBB->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
- for (const auto &PCI : PredCount) {
- BasicBlock *Pred = PCI.first;
- for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
+ for (const auto &[Pred, Count] : PredCount) {
+ for (unsigned _ : llvm::seq<unsigned>(Count))
PN->removeIncomingValue(Pred, false);
}
}
@@ -866,8 +861,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// As phi-nodes have been now remapped, allow incremental simplification of
// newly-cloned instructions.
const DataLayout &DL = NewFunc->getDataLayout();
- for (const auto &BB : *OldFunc) {
- for (const auto &I : BB) {
+ for (const BasicBlock &BB : *OldFunc) {
+ for (const Instruction &I : BB) {
auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I));
if (!NewI)
continue;
@@ -997,8 +992,8 @@ void llvm::CloneAndPruneFunctionInto(
void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
- for (auto *BB : Blocks) {
- for (auto &Inst : *BB) {
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &Inst : *BB) {
RemapDbgRecordRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&Inst, VMap,
@@ -1151,9 +1146,9 @@ void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
StringRef Ext, LLVMContext &Context) {
MDBuilder MDB(Context);
- for (auto *ScopeList : NoAliasDeclScopes) {
- for (const auto &MDOperand : ScopeList->operands()) {
- if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ for (MDNode *ScopeList : NoAliasDeclScopes) {
+ for (const MDOperand &MDOp : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
AliasScopeNode SNANode(MD);
std::string Name;
@@ -1177,7 +1172,7 @@ void llvm::adaptNoAliasScopes(Instruction *I,
auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
bool NeedsReplacement = false;
SmallVector<Metadata *, 8> NewScopeList;
- for (const auto &MDOp : ScopeList->operands()) {
+ for (const MDOperand &MDOp : ScopeList->operands()) {
if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
if (auto *NewMD = ClonedScopes.lookup(MD)) {
NewScopeList.push_back(NewMD);
@@ -1193,12 +1188,12 @@ void llvm::adaptNoAliasScopes(Instruction *I,
};
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
- if (auto *NewScopeList = CloneScopeList(Decl->getScopeList()))
+ if (MDNode *NewScopeList = CloneScopeList(Decl->getScopeList()))
Decl->setScopeList(NewScopeList);
auto replaceWhenNeeded = [&](unsigned MD_ID) {
if (const MDNode *CSNoAlias = I->getMetadata(MD_ID))
- if (auto *NewScopeList = CloneScopeList(CSNoAlias))
+ if (MDNode *NewScopeList = CloneScopeList(CSNoAlias))
I->setMetadata(MD_ID, NewScopeList);
};
replaceWhenNeeded(LLVMContext::MD_noalias);
diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index d7bf791..fb39fdd 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
@@ -112,7 +112,7 @@ struct BBValueInfo {
void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
SmallVectorImpl<PHINode *> *InsertedPHIs) {
DenseMap<BasicBlock *, BBValueInfo> BBInfos;
- for (auto &R : Rewrites) {
+ for (RewriteInfo &R : Rewrites) {
BBInfos.clear();
// Compute locations for new phi-nodes.
@@ -145,7 +145,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
BBInfos[BB].LiveOutValue = V;
// We've computed IDF, now insert new phi-nodes there.
- for (auto *FrontierBB : IDFBlocks) {
+ for (BasicBlock *FrontierBB : IDFBlocks) {
IRBuilder<> B(FrontierBB, FrontierBB->begin());
PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name);
BBInfos[FrontierBB].LiveInValue = PN;
@@ -156,7 +156,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
// IsLiveOut indicates whether we are computing live-out values (true) or
// live-in values (false).
auto ComputeValue = [&](BasicBlock *BB, bool IsLiveOut) -> Value * {
- auto *BBInfo = &BBInfos[BB];
+ BBValueInfo *BBInfo = &BBInfos[BB];
if (IsLiveOut && BBInfo->LiveOutValue)
return BBInfo->LiveOutValue;
@@ -187,7 +187,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
if (!V)
V = UndefValue::get(R.Ty);
- for (auto *BBInfo : Stack)
+ for (BBValueInfo *BBInfo : Stack)
// Loop above can insert new entries into the BBInfos map: assume the
// map shouldn't grow due to [1] and BBInfo references are valid.
BBInfo->LiveInValue = V;
@@ -196,7 +196,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
};
// Fill in arguments of the inserted PHIs.
- for (auto *BB : IDFBlocks) {
+ for (BasicBlock *BB : IDFBlocks) {
auto *PHI = cast<PHINode>(&BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PHI->addIncoming(ComputeValue(Pred, /*IsLiveOut=*/true), Pred);
@@ -222,3 +222,96 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
}
}
}
+
+// Perform a single pass of simplification over the worklist of PHIs.
+// This should be called after RewriteAllUses() because simplifying PHIs
+// immediately after creation would require updating all references to those
+// PHIs in the BBValueInfo structures, which would necessitate additional
+// reference tracking overhead.
+static void simplifyPass(MutableArrayRef<PHINode *> Worklist,
+ const DataLayout &DL) {
+ for (PHINode *&PHI : Worklist) {
+ if (Value *Simplified = simplifyInstruction(PHI, DL)) {
+ PHI->replaceAllUsesWith(Simplified);
+ PHI->eraseFromParent();
+ PHI = nullptr; // Mark as removed.
+ }
+ }
+}
+
+#ifndef NDEBUG // Should this be under EXPENSIVE_CHECKS?
+// New PHI nodes should not reference one another but they may reference
+// themselves or existing PHI nodes, and existing PHI nodes may reference new
+// PHI nodes.
+static bool
+PHIAreRefEachOther(const iterator_range<BasicBlock::phi_iterator> NewPHIs) {
+ SmallPtrSet<PHINode *, 8> NewPHISet;
+ for (PHINode &PN : NewPHIs)
+ NewPHISet.insert(&PN);
+ for (PHINode &PHI : NewPHIs) {
+ for (Value *V : PHI.incoming_values()) {
+ PHINode *IncPHI = dyn_cast<PHINode>(V);
+ if (IncPHI && IncPHI != &PHI && NewPHISet.contains(IncPHI))
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) {
+ if (!PHI.isIdenticalToWhenDefined(&ReplPHI))
+ return false;
+ PHI.replaceAllUsesWith(&ReplPHI);
+ PHI.eraseFromParent();
+ return true;
+}
+
+bool EliminateNewDuplicatePHINodes(BasicBlock *BB,
+ BasicBlock::phi_iterator FirstExistingPN) {
+ assert(!PHIAreRefEachOther(make_range(BB->phis().begin(), FirstExistingPN)));
+
+ // Deduplicate new PHIs first to reduce the number of comparisons on the
+ // following new -> existing pass.
+ bool Changed = false;
+ for (auto I = BB->phis().begin(); I != FirstExistingPN; ++I) {
+ for (auto J = std::next(I); J != FirstExistingPN;) {
+ Changed |= replaceIfIdentical(*J++, *I);
+ }
+ }
+
+ // Iterate over existing PHIs and replace identical new PHIs.
+ for (PHINode &ExistingPHI : make_range(FirstExistingPN, BB->phis().end())) {
+ auto I = BB->phis().begin();
+ assert(I != FirstExistingPN); // Should be at least one new PHI.
+ do {
+ Changed |= replaceIfIdentical(*I++, ExistingPHI);
+ } while (I != FirstExistingPN);
+ if (BB->phis().begin() == FirstExistingPN)
+ return Changed;
+ }
+ return Changed;
+}
+
+static void deduplicatePass(ArrayRef<PHINode *> Worklist) {
+ SmallDenseMap<BasicBlock *, unsigned> BBs;
+ for (PHINode *PHI : Worklist) {
+ if (PHI)
+ ++BBs[PHI->getParent()];
+ }
+
+ for (auto [BB, NumNewPHIs] : BBs) {
+ auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs);
+ EliminateNewDuplicatePHINodes(BB, FirstExistingPN);
+ }
+}
+
+void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) {
+ SmallVector<PHINode *, 4> PHIs;
+ RewriteAllUses(&DT, &PHIs);
+ if (PHIs.empty())
+ return;
+
+ simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout());
+ deduplicatePass(PHIs);
+}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6f4bec..88af2cf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10659,7 +10659,8 @@ class InstructionsCompatibilityAnalysis {
static bool isSupportedOpcode(const unsigned Opcode) {
return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
- Opcode == Instruction::UDiv;
+ Opcode == Instruction::UDiv || Opcode == Instruction::And ||
+ Opcode == Instruction::Or || Opcode == Instruction::Xor;
}
/// Identifies the best candidate value, which represents main opcode
@@ -10984,6 +10985,9 @@ public:
case Instruction::Shl:
case Instruction::SDiv:
case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
@@ -19456,7 +19460,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
assert(getNumElements(Cond->getType()) == TrueNumElements &&
"Cannot vectorize Instruction::Select");
- Value *V = Builder.CreateSelect(Cond, True, False);
+ Value *V =
+ Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE);
V = FinalShuffle(V, E);
E->VectorizedValue = V;
@@ -23576,18 +23581,19 @@ class HorizontalReduction {
switch (Kind) {
case RecurKind::Or: {
if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy))
- return Builder.CreateSelect(
+ return Builder.CreateSelectWithUnknownProfile(
LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)),
- RHS, Name);
+ RHS, DEBUG_TYPE, Name);
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
}
case RecurKind::And: {
if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy))
- return Builder.CreateSelect(
+ return Builder.CreateSelectWithUnknownProfile(
LHS, RHS,
- ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name);
+ ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)),
+ DEBUG_TYPE, Name);
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
@@ -23608,7 +23614,8 @@ class HorizontalReduction {
if (UseSelect) {
CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind);
Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE,
+ Name);
}
[[fallthrough]];
case RecurKind::FMax:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1fea068..0101942 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
const VPRecipeBase *R = &VPBB->back();
bool IsSwitch = isa<VPInstruction>(R) &&
cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
- bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) ||
- match(R, m_BranchOnCond(m_VPValue())) ||
- match(R, m_BranchOnCount(m_VPValue(), m_VPValue()));
+ bool IsCondBranch =
+ isa<VPBranchOnMaskRecipe>(R) ||
+ match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()));
(void)IsCondBranch;
(void)IsSwitch;
if (VPBB->getNumSuccessors() == 2 ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fb696be..8ca3bed 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1064,6 +1064,7 @@ public:
ResumeForEpilogue,
/// Returns the value for vscale.
VScale,
+ OpsEnd = VScale,
};
/// Returns true if this VPInstruction generates scalar values for all lanes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 81deba2..c0147ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -433,8 +433,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
// We are about to replace the branch to exit the region. Remove the original
// BranchOnCond, if there is any.
DebugLoc LatchDL = DL;
- if (!LatchVPBB->empty() &&
- match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) {
+ if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
LatchVPBB->getTerminator()->eraseFromParent();
}
@@ -480,8 +479,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, Loop *TheLoop) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
canonicalHeaderAndLatch(HeaderVPBB, VPDT);
@@ -623,8 +621,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan,
}
void VPlanTransforms::createLoopRegions(VPlan &Plan) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
createLoopRegion(Plan, HeaderVPB);
@@ -875,8 +872,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
auto *VPBB = cast<VPBasicBlock>(VPB);
for (auto &R : *VPBB) {
- if (R.mayWriteToMemory() &&
- !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+ if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
return false;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 577432f..44506f5a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -39,7 +39,6 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
using Base = DominatorTreeBase<VPBlockBase, false>;
public:
- VPDominatorTree() = default;
explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
/// Returns true if \p A properly dominates \p B.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 555efea..b42b049 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -344,6 +344,10 @@ m_Freeze(const Op0_t &Op0) {
return m_VPInstruction<Instruction::Freeze>(Op0);
}
+inline VPInstruction_match<VPInstruction::BranchOnCond> m_BranchOnCond() {
+ return m_VPInstruction<VPInstruction::BranchOnCond>();
+}
+
template <typename Op0_t>
inline VPInstruction_match<VPInstruction::BranchOnCond, Op0_t>
m_BranchOnCond(const Op0_t &Op0) {
@@ -374,6 +378,10 @@ m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return m_VPInstruction<VPInstruction::ActiveLaneMask>(Op0, Op1, Op2);
}
+inline VPInstruction_match<VPInstruction::BranchOnCount> m_BranchOnCount() {
+ return m_VPInstruction<VPInstruction::BranchOnCount>();
+}
+
template <typename Op0_t, typename Op1_t>
inline VPInstruction_match<VPInstruction::BranchOnCount, Op0_t, Op1_t>
m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8e916772..2368d18 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1154,7 +1154,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
case VPInstruction::ExtractPenultimateElement:
if (VF == ElementCount::getScalable(1))
return InstructionCost::getInvalid();
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
default:
// TODO: Compute cost other VPInstructions once the legacy cost model has
// been retired.
@@ -2855,7 +2855,7 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
case ExpressionTypes::ExtNegatedMulAccReduction:
assert(Opcode == Instruction::Add && "Unexpected opcode");
Opcode = Instruction::Sub;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ExpressionTypes::ExtMulAccReduction: {
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9bb8820..40b7e8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1658,7 +1658,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
auto *Term = &ExitingVPBB->back();
VPValue *Cond;
ScalarEvolution &SE = *PSE.getSE();
- if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) ||
+ if (match(Term, m_BranchOnCount()) ||
match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask(
m_VPValue(), m_VPValue(), m_VPValue()))))) {
// Try to simplify the branch condition if TC <= VF * UF when the latch
@@ -1909,8 +1909,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &LoopBuilder) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
for (VPRecipeBase &R :
@@ -1992,6 +1991,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
return std::make_pair(true, I->getVectorIntrinsicID());
})
+ .Case<VPVectorPointerRecipe>([](auto *I) {
+ // For recipes that do not directly map to LLVM IR instructions,
+ // assign opcodes after the last VPInstruction opcode (which is also
+ // after the last IR Instruction opcode), based on the VPDefID.
+ return std::make_pair(false,
+ VPInstruction::OpsEnd + 1 + I->getVPDefID());
+ })
.Default([](auto *) { return std::nullopt; });
}
@@ -2015,11 +2021,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
static bool canHandle(const VPSingleDefRecipe *Def) {
// We can extend the list of handled recipes in the future,
// provided we account for the data embedded in them while checking for
- // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode,
- // as it is essentially a GEP with different semantics.
- auto C = isa<VPVectorPointerRecipe>(Def)
- ? std::make_pair(false, Instruction::GetElementPtr)
- : getOpcodeOrIntrinsicID(Def);
+ // equality or hashing.
+ auto C = getOpcodeOrIntrinsicID(Def);
// The issue with (Insert|Extract)Value is that the index of the
// insert/extract is not a proper operand in LLVM IR, and hence also not in
@@ -2058,6 +2061,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
vputils::isSingleScalar(L) != vputils::isSingleScalar(R) ||
!equal(L->operands(), R->operands()))
return false;
+ assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) &&
+ "must have valid opcode info for both recipes");
if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L))
if (LFlags->hasPredicate() &&
LFlags->getPredicate() !=
@@ -3021,8 +3026,7 @@ void VPlanTransforms::createInterleaveGroups(
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
for (const auto *IG : InterleaveGroups) {
auto *Start =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));
@@ -3398,9 +3402,8 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
VPBuilder Builder(LatchVPBB->getTerminator());
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
- assert(
- match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) &&
- "Terminator must be be BranchOnCond");
+ assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
+ "Terminator must be be BranchOnCond");
VPValue *CondOfEarlyExitingVPBB =
EarlyExitingVPBB->getTerminator()->getOperand(0);
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
@@ -3662,8 +3665,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
return;
#ifndef NDEBUG
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
#endif
SmallVector<VPValue *> VPValues;
@@ -4009,8 +4011,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R) ||
- match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+ if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
continue;
if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 5e7f19f..1c4adfc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -259,8 +259,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
/// Handle non-header-phi recipes.
void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
- if (match(&R, m_BranchOnCond(m_VPValue())) ||
- match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+ if (match(&R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount())))
return;
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 013ea2e..5262af6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -24,6 +24,7 @@
#define DEBUG_TYPE "loop-vectorize"
using namespace llvm;
+using namespace VPlanPatternMatch;
namespace {
class VPlanVerifier {
@@ -198,7 +199,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
// EVLIVIncrement is only used by EVLIV & BranchOnCount.
// Having more than two users is unexpected.
- using namespace llvm::VPlanPatternMatch;
if (I->getOpcode() != VPInstruction::Broadcast &&
I->getNumUsers() != 1 &&
(I->getNumUsers() != 2 ||
@@ -479,8 +479,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
}
auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end()));
- if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount &&
- LastInst->getOpcode() != VPInstruction::BranchOnCond)) {
+ if (!match(LastInst, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) {
errs() << "VPlan vector loop exit must end with BranchOnCount or "
"BranchOnCond VPInstruction\n";
return false;
@@ -490,8 +489,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
}
bool llvm::verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate) {
- VPDominatorTree VPDT;
- VPDT.recalculate(const_cast<VPlan &>(Plan));
+ VPDominatorTree VPDT(const_cast<VPlan &>(Plan));
VPTypeAnalysis TypeInfo(Plan);
VPlanVerifier Verifier(VPDT, TypeInfo, VerifyLate);
return Verifier.verify(Plan);
diff --git a/llvm/lib/XRay/BlockIndexer.cpp b/llvm/lib/XRay/BlockIndexer.cpp
index f4ba0eb..d0c6853 100644
--- a/llvm/lib/XRay/BlockIndexer.cpp
+++ b/llvm/lib/XRay/BlockIndexer.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/BlockIndexer.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error BlockIndexer::visit(BufferExtents &) { return Error::success(); }
@@ -89,6 +89,3 @@ Error BlockIndexer::flush() {
CurrentBlock.WallclockTime = nullptr;
return Error::success();
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/BlockPrinter.cpp b/llvm/lib/XRay/BlockPrinter.cpp
index 63a60c3..d85be5b 100644
--- a/llvm/lib/XRay/BlockPrinter.cpp
+++ b/llvm/lib/XRay/BlockPrinter.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/BlockPrinter.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error BlockPrinter::visit(BufferExtents &R) {
OS << "\n[New Block]\n";
@@ -108,6 +108,3 @@ Error BlockPrinter::visit(EndBufferRecord &R) {
auto E = RP.visit(R);
return E;
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/BlockVerifier.cpp b/llvm/lib/XRay/BlockVerifier.cpp
index 99f255e..e39f6b6 100644
--- a/llvm/lib/XRay/BlockVerifier.cpp
+++ b/llvm/lib/XRay/BlockVerifier.cpp
@@ -10,19 +10,18 @@
#include <bitset>
-namespace llvm {
-namespace xray {
-namespace {
+using namespace llvm;
+using namespace llvm::xray;
-constexpr unsigned long long mask(BlockVerifier::State S) {
+static constexpr unsigned long long mask(BlockVerifier::State S) {
return 1uLL << static_cast<std::size_t>(S);
}
-constexpr std::size_t number(BlockVerifier::State S) {
+static constexpr std::size_t number(BlockVerifier::State S) {
return static_cast<std::size_t>(S);
}
-StringRef recordToString(BlockVerifier::State R) {
+static StringRef recordToString(BlockVerifier::State R) {
switch (R) {
case BlockVerifier::State::BufferExtents:
return "BufferExtents";
@@ -53,6 +52,8 @@ StringRef recordToString(BlockVerifier::State R) {
llvm_unreachable("Unkown state!");
}
+namespace {
+
struct Transition {
BlockVerifier::State From;
std::bitset<number(BlockVerifier::State::StateMax)> ToStates;
@@ -133,7 +134,7 @@ Error BlockVerifier::transition(State To) {
CurrentRecord = To;
return Error::success();
-} // namespace xray
+}
Error BlockVerifier::visit(BufferExtents &) {
return transition(State::BufferExtents);
@@ -201,6 +202,3 @@ Error BlockVerifier::verify() {
}
void BlockVerifier::reset() { CurrentRecord = State::Unknown; }
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/FDRRecordProducer.cpp b/llvm/lib/XRay/FDRRecordProducer.cpp
index 479b710..0f4eed1 100644
--- a/llvm/lib/XRay/FDRRecordProducer.cpp
+++ b/llvm/lib/XRay/FDRRecordProducer.cpp
@@ -10,8 +10,8 @@
#include <cstdint>
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
namespace {
@@ -31,8 +31,9 @@ enum MetadataRecordKinds : uint8_t {
// This is an end marker, used to identify the upper bound for this enum.
EnumEndMarker,
};
+} // namespace
-Expected<std::unique_ptr<Record>>
+static Expected<std::unique_ptr<Record>>
metadataRecordType(const XRayFileHeader &Header, uint8_t T) {
if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker))
@@ -72,12 +73,10 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) {
llvm_unreachable("Unhandled MetadataRecordKinds enum value");
}
-constexpr bool isMetadataIntroducer(uint8_t FirstByte) {
+static constexpr bool isMetadataIntroducer(uint8_t FirstByte) {
return FirstByte & 0x01u;
}
-} // namespace
-
Expected<std::unique_ptr<Record>>
FileBasedRecordProducer::findNextBufferExtent() {
// We seek one byte at a time until we find a suitable buffer extents metadata
@@ -193,6 +192,3 @@ Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
assert(R != nullptr);
return std::move(R);
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/FDRRecords.cpp b/llvm/lib/XRay/FDRRecords.cpp
index ff315d3..a18f733 100644
--- a/llvm/lib/XRay/FDRRecords.cpp
+++ b/llvm/lib/XRay/FDRRecords.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecords.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); }
Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); }
@@ -61,6 +61,3 @@ StringRef Record::kindToString(RecordKind K) {
}
return "Unknown";
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/FDRTraceExpander.cpp b/llvm/lib/XRay/FDRTraceExpander.cpp
index b68e997..991e6e5 100644
--- a/llvm/lib/XRay/FDRTraceExpander.cpp
+++ b/llvm/lib/XRay/FDRTraceExpander.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRTraceExpander.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
void TraceExpander::resetCurrentRecord() {
if (BuildingRecord)
@@ -126,6 +126,3 @@ Error TraceExpander::flush() {
resetCurrentRecord();
return Error::success();
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp
index fb59125..3e320a6 100644
--- a/llvm/lib/XRay/FDRTraceWriter.cpp
+++ b/llvm/lib/XRay/FDRTraceWriter.cpp
@@ -12,8 +12,8 @@
#include "llvm/XRay/FDRTraceWriter.h"
#include <tuple>
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
namespace {
@@ -37,9 +37,10 @@ template <size_t Index> struct IndexedWriter {
return 0;
}
};
+} // namespace
template <uint8_t Kind, class... Values>
-Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) {
+static Error writeMetadata(support::endian::Writer &OS, Values &&...Ds) {
// The first bit in the first byte of metadata records is always set to 1, so
// we ensure this is the case when we write out the first byte of the record.
uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u};
@@ -54,8 +55,6 @@ Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) {
return Error::success();
}
-} // namespace
-
FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H)
: OS(O, llvm::endianness::native) {
// We need to re-construct a header, by writing the fields we care about for
@@ -146,6 +145,3 @@ Error FDRTraceWriter::visit(FunctionRecord &R) {
OS.write(R.delta());
return Error::success();
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/FileHeaderReader.cpp b/llvm/lib/XRay/FileHeaderReader.cpp
index 6b6daf9..681cef7 100644
--- a/llvm/lib/XRay/FileHeaderReader.cpp
+++ b/llvm/lib/XRay/FileHeaderReader.cpp
@@ -7,12 +7,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FileHeaderReader.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
// Populates the FileHeader reference by reading the first 32 bytes of the file.
-Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
- uint64_t &OffsetPtr) {
+Expected<XRayFileHeader>
+xray::readBinaryFormatHeader(DataExtractor &HeaderExtractor,
+ uint64_t &OffsetPtr) {
// FIXME: Maybe deduce whether the data is little or big-endian using some
// magic bytes in the beginning of the file?
@@ -68,6 +69,3 @@ Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
OffsetPtr += 16;
return std::move(FileHeader);
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/LogBuilderConsumer.cpp b/llvm/lib/XRay/LogBuilderConsumer.cpp
index ffb49f9..f0fc336 100644
--- a/llvm/lib/XRay/LogBuilderConsumer.cpp
+++ b/llvm/lib/XRay/LogBuilderConsumer.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecordConsumer.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) {
if (!R)
@@ -32,6 +32,3 @@ Error PipelineConsumer::consume(std::unique_ptr<Record> R) {
Result = joinErrors(std::move(Result), R->apply(*V));
return Result;
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/Profile.cpp b/llvm/lib/XRay/Profile.cpp
index 1b340e5..ecb767b 100644
--- a/llvm/lib/XRay/Profile.cpp
+++ b/llvm/lib/XRay/Profile.cpp
@@ -18,8 +18,8 @@
#include "llvm/XRay/Trace.h"
#include <memory>
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Profile::Profile(const Profile &O) {
// We need to re-create all the tries from the original (O), into the current
@@ -46,6 +46,7 @@ struct BlockHeader {
uint32_t Number;
uint64_t Thread;
};
+} // namespace
static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
uint64_t &Offset) {
@@ -115,8 +116,6 @@ static Expected<Profile::Data> readData(DataExtractor &Extractor,
return D;
}
-} // namespace
-
Error Profile::addBlock(Block &&B) {
if (B.PathData.empty())
return make_error<StringError>(
@@ -189,7 +188,7 @@ Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
return Node->ID;
}
-Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
+Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) {
Profile Merged;
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
using PathDataMapPtr = std::unique_ptr<PathDataMap>;
@@ -228,7 +227,7 @@ Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
return Merged;
}
-Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
+Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) {
Profile Merged;
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
PathDataMap PathData;
@@ -258,7 +257,7 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
return Merged;
}
-Expected<Profile> loadProfile(StringRef Filename) {
+Expected<Profile> xray::loadProfile(StringRef Filename) {
Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
if (!FdOrErr)
return FdOrErr.takeError();
@@ -322,7 +321,7 @@ struct StackEntry {
} // namespace
-Expected<Profile> profileFromTrace(const Trace &T) {
+Expected<Profile> xray::profileFromTrace(const Trace &T) {
Profile P;
// The implementation of the algorithm re-creates the execution of
@@ -397,6 +396,3 @@ Expected<Profile> profileFromTrace(const Trace &T) {
return P;
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/RecordInitializer.cpp b/llvm/lib/XRay/RecordInitializer.cpp
index 68ab3db..83d5f14 100644
--- a/llvm/lib/XRay/RecordInitializer.cpp
+++ b/llvm/lib/XRay/RecordInitializer.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecords.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error RecordInitializer::visit(BufferExtents &R) {
if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t)))
@@ -426,6 +426,3 @@ Error RecordInitializer::visit(FunctionRecord &R) {
assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset));
return Error::success();
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/RecordPrinter.cpp b/llvm/lib/XRay/RecordPrinter.cpp
index 32d4210..b9b7a16 100644
--- a/llvm/lib/XRay/RecordPrinter.cpp
+++ b/llvm/lib/XRay/RecordPrinter.cpp
@@ -9,8 +9,8 @@
#include "llvm/Support/FormatVariadic.h"
-namespace llvm {
-namespace xray {
+using namespace llvm;
+using namespace llvm::xray;
Error RecordPrinter::visit(BufferExtents &R) {
OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim;
@@ -103,6 +103,3 @@ Error RecordPrinter::visit(FunctionRecord &R) {
OS << Delim;
return Error::success();
}
-
-} // namespace xray
-} // namespace llvm
diff --git a/llvm/lib/XRay/Trace.cpp b/llvm/lib/XRay/Trace.cpp
index 74515b1..14a3f01 100644
--- a/llvm/lib/XRay/Trace.cpp
+++ b/llvm/lib/XRay/Trace.cpp
@@ -29,11 +29,9 @@ using namespace llvm;
using namespace llvm::xray;
using llvm::yaml::Input;
-namespace {
-
-Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
- XRayFileHeader &FileHeader,
- std::vector<XRayRecord> &Records) {
+static Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
+ XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
if (Data.size() < 32)
return make_error<StringError>(
"Not enough bytes for an XRay log.",
@@ -265,8 +263,9 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
/// what FunctionRecord instances use, and we no longer need to include the CPU
/// id in the CustomEventRecord.
///
-Error loadFDRLog(StringRef Data, bool IsLittleEndian,
- XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) {
+static Error loadFDRLog(StringRef Data, bool IsLittleEndian,
+ XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
if (Data.size() < 32)
return createStringError(std::make_error_code(std::errc::invalid_argument),
@@ -348,8 +347,8 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian,
return Error::success();
}
-Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
- std::vector<XRayRecord> &Records) {
+static Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
YAMLXRayTrace Trace;
Input In(Data);
In >> Trace;
@@ -376,7 +375,6 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
});
return Error::success();
}
-} // namespace
Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);