aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp376
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp111
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp6
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp4
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp1
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp4
-rw-r--r--llvm/lib/CAS/OnDiskGraphDB.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp60
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp9
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp2
-rw-r--r--llvm/lib/CodeGen/MIR2Vec.cpp166
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp6
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp18
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp15
-rw-r--r--llvm/lib/CodeGen/TargetOptionsImpl.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp49
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp1002
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp2
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp114
-rw-r--r--llvm/lib/IR/Instructions.cpp2
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp12
-rw-r--r--llvm/lib/IR/RuntimeLibcalls.cpp3
-rw-r--r--llvm/lib/LTO/LTO.cpp4
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp4
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp15
-rw-r--r--llvm/lib/Linker/IRMover.cpp10
-rw-r--r--llvm/lib/Support/AllocToken.cpp50
-rw-r--r--llvm/lib/Support/CMakeLists.txt1
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp140
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp51
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp17
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp10
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h3
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp40
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp334
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp71
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td30
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp3
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp229
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td41
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp43
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp70
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td80
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp50
-rw-r--r--llvm/lib/Target/TargetMachine.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp6
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp54
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp130
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp117
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h38
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp134
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h22
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp27
95 files changed, 2667 insertions, 1441 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 805b682..853bd66 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -122,11 +122,61 @@ static cl::opt<unsigned> MIVMaxLevelThreshold(
cl::desc("Maximum depth allowed for the recursive algorithm used to "
"explore MIV direction vectors."));
-static cl::opt<bool> RunSIVRoutinesOnly(
- "da-run-siv-routines-only", cl::init(false), cl::ReallyHidden,
- cl::desc("Run only SIV routines and disable others (ZIV, RDIV, and MIV). "
- "The purpose is mainly to exclude the influence of those routines "
- "in regression tests for SIV routines."));
+namespace {
+
+/// Types of dependence test routines.
+enum class DependenceTestType {
+ All,
+ StrongSIV,
+ WeakCrossingSIV,
+ ExactSIV,
+ WeakZeroSIV,
+ ExactRDIV,
+ SymbolicRDIV,
+ GCDMIV,
+ BanerjeeMIV,
+};
+
+} // anonymous namespace
+
+static cl::opt<DependenceTestType> EnableDependenceTest(
+ "da-enable-dependence-test", cl::init(DependenceTestType::All),
+ cl::ReallyHidden,
+ cl::desc("Run only specified dependence test routine and disable others. "
+ "The purpose is mainly to exclude the influence of other "
+ "dependence test routines in regression tests. If set to All, all "
+ "dependence test routines are enabled."),
+ cl::values(clEnumValN(DependenceTestType::All, "all",
+ "Enable all dependence test routines."),
+ clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
+ "Enable only Strong SIV test."),
+ clEnumValN(DependenceTestType::WeakCrossingSIV,
+ "weak-crossing-siv",
+ "Enable only Weak-Crossing SIV test."),
+ clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
+ "Enable only Exact SIV test."),
+ clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
+ "Enable only Weak-Zero SIV test."),
+ clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
+ "Enable only Exact RDIV test."),
+ clEnumValN(DependenceTestType::SymbolicRDIV, "symbolic-rdiv",
+ "Enable only Symbolic RDIV test."),
+ clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
+ "Enable only GCD MIV test."),
+ clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
+ "Enable only Banerjee MIV test.")));
+
+// TODO: This flag is disabled by default because it is still under development.
+// Enable it or delete this flag when the feature is ready.
+static cl::opt<bool> EnableMonotonicityCheck(
+ "da-enable-monotonicity-check", cl::init(false), cl::Hidden,
+ cl::desc("Check if the subscripts are monotonic. If it's not, dependence "
+ "is reported as unknown."));
+
+static cl::opt<bool> DumpMonotonicityReport(
+ "da-dump-monotonicity-report", cl::init(false), cl::Hidden,
+ cl::desc(
+ "When printing analysis, dump the results of monotonicity checks."));
//===----------------------------------------------------------------------===//
// basics
@@ -177,13 +227,196 @@ void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
+namespace {
+
+/// The property of monotonicity of a SCEV. To define the monotonicity, assume
+/// a SCEV defined within N-nested loops. Let i_k denote the iteration number
+/// of the k-th loop. Then we can regard the SCEV as an N-ary function:
+///
+/// F(i_1, i_2, ..., i_N)
+///
+/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the
+/// backedge-taken count of the k-th loop.
+///
+/// A function F is said to be "monotonically increasing with respect to the
+/// k-th loop" if x <= y implies the following condition:
+///
+/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <=
+/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
+///
+/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their
+/// respective domains.
+///
+/// Likewise F is "monotonically decreasing with respect to the k-th loop"
+/// if x <= y implies
+///
+/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >=
+/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
+///
+/// A function F that is monotonically increasing or decreasing with respect to
+/// the k-th loop is simply called "monotonic with respect to k-th loop".
+///
+/// A function F is said to be "multivariate monotonic" when it is monotonic
+/// with respect to all of the N loops.
+///
+/// Since integer comparison can be either signed or unsigned, we need to
+/// distinguish monotonicity in the signed sense from that in the unsigned
+/// sense. Note that the inequality "x <= y" merely indicates loop progression
+/// and is not affected by the difference between signed and unsigned order.
+///
+/// Currently we only consider monotonicity in a signed sense.
+enum class SCEVMonotonicityType {
+ /// We don't know anything about the monotonicity of the SCEV.
+ Unknown,
+
+ /// The SCEV is loop-invariant with respect to the outermost loop. In other
+ /// words, the function F corresponding to the SCEV is a constant function.
+ Invariant,
+
+ /// The function F corresponding to the SCEV is multivariate monotonic in a
+ /// signed sense. Note that the multivariate monotonic function may also be a
+ /// constant function. The order employed in the definition of monotonicity
+ /// is not strict order.
+ MultivariateSignedMonotonic,
+};
+
+struct SCEVMonotonicity {
+ SCEVMonotonicity(SCEVMonotonicityType Type,
+ const SCEV *FailurePoint = nullptr);
+
+ SCEVMonotonicityType getType() const { return Type; }
+
+ const SCEV *getFailurePoint() const { return FailurePoint; }
+
+ bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; }
+
+ void print(raw_ostream &OS, unsigned Depth) const;
+
+private:
+ SCEVMonotonicityType Type;
+
+ /// The subexpression that caused Unknown. Mainly for debugging purpose.
+ const SCEV *FailurePoint;
+};
+
+/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.)
+/// assume that subscript expressions are (multivariate) monotonic, we need to
+/// verify this property before applying those tests. Violating this assumption
+/// may cause them to produce incorrect results.
+struct SCEVMonotonicityChecker
+ : public SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity> {
+
+ SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {}
+
+ /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p
+ /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or
+ /// one of its nested loops.
+ SCEVMonotonicity checkMonotonicity(const SCEV *Expr,
+ const Loop *OutermostLoop);
+
+private:
+ ScalarEvolution *SE;
+
+ /// The outermost loop that DA is analyzing.
+ const Loop *OutermostLoop;
+
+ /// A helper to classify \p Expr as either Invariant or Unknown.
+ SCEVMonotonicity invariantOrUnknown(const SCEV *Expr);
+
+ /// Return true if \p Expr is loop-invariant with respect to the outermost
+ /// loop.
+ bool isLoopInvariant(const SCEV *Expr) const;
+
+ /// A helper to create an Unknown SCEVMonotonicity.
+ SCEVMonotonicity createUnknown(const SCEV *FailurePoint) {
+ return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint);
+ }
+
+ SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr);
+
+ SCEVMonotonicity visitConstant(const SCEVConstant *) {
+ return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
+ }
+ SCEVMonotonicity visitVScale(const SCEVVScale *) {
+ return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
+ }
+
+ // TODO: Handle more cases.
+ SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+ SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return invariantOrUnknown(Expr);
+ }
+
+ friend struct SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity>;
+};
+
+} // anonymous namespace
+
// Used to test the dependence analyzer.
// Looks through the function, noting instructions that may access memory.
// Calls depends() on every possible pair and prints out the result.
// Ignores all other instructions.
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
- ScalarEvolution &SE, bool NormalizeResults) {
+ ScalarEvolution &SE, LoopInfo &LI,
+ bool NormalizeResults) {
auto *F = DA->getFunction();
+
+ if (DumpMonotonicityReport) {
+ SCEVMonotonicityChecker Checker(&SE);
+ OS << "Monotonicity check:\n";
+ for (Instruction &Inst : instructions(F)) {
+ if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
+ continue;
+ Value *Ptr = getLoadStorePointerOperand(&Inst);
+ const Loop *L = LI.getLoopFor(Inst.getParent());
+ const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L);
+ const SCEV *AccessFn = SE.removePointerBase(PtrSCEV);
+ SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, L);
+ OS.indent(2) << "Inst: " << Inst << "\n";
+ OS.indent(4) << "Expr: " << *AccessFn << "\n";
+ Mon.print(OS, 4);
+ }
+ OS << "\n";
+ }
+
for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
++SrcI) {
if (SrcI->mayReadOrWriteMemory()) {
@@ -235,7 +468,8 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
const Module *) const {
dumpExampleDependence(
- OS, info.get(), getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false);
+ OS, info.get(), getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), false);
}
PreservedAnalyses
@@ -244,7 +478,7 @@ DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
<< "':\n";
dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F),
FAM.getResult<ScalarEvolutionAnalysis>(F),
- NormalizeResults);
+ FAM.getResult<LoopAnalysis>(F), NormalizeResults);
return PreservedAnalyses::all();
}
@@ -671,6 +905,81 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
}
//===----------------------------------------------------------------------===//
+// SCEVMonotonicity
+
+SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type,
+ const SCEV *FailurePoint)
+ : Type(Type), FailurePoint(FailurePoint) {
+ assert(
+ ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) &&
+ "FailurePoint must be provided iff Type is Unknown");
+}
+
+void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const {
+ OS.indent(Depth) << "Monotonicity: ";
+ switch (Type) {
+ case SCEVMonotonicityType::Unknown:
+ assert(FailurePoint && "FailurePoint must be provided for Unknown");
+ OS << "Unknown\n";
+ OS.indent(Depth) << "Reason: " << *FailurePoint << "\n";
+ break;
+ case SCEVMonotonicityType::Invariant:
+ OS << "Invariant\n";
+ break;
+ case SCEVMonotonicityType::MultivariateSignedMonotonic:
+ OS << "MultivariateSignedMonotonic\n";
+ break;
+ }
+}
+
+bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const {
+ return !OutermostLoop || SE->isLoopInvariant(Expr, OutermostLoop);
+}
+
+SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
+ if (isLoopInvariant(Expr))
+ return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
+ return createUnknown(Expr);
+}
+
+SCEVMonotonicity
+SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
+ const Loop *OutermostLoop) {
+ assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
+ this->OutermostLoop = OutermostLoop;
+ return visit(Expr);
+}
+
+/// We only care about an affine AddRec at the moment. For an affine AddRec,
+/// the monotonicity can be inferred from its nowrap property. For example, let
+/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec
+/// {X,+.Y}<nsw> implies:
+///
+/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ...
+///
+/// Thus, we can conclude that the AddRec is monotonically increasing with
+/// respect to the associated loop in a signed sense. The similar reasoning
+/// applies when Y is non-positive, leading to a monotonically decreasing
+/// AddRec.
+SCEVMonotonicity
+SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (!Expr->isAffine() || !Expr->hasNoSignedWrap())
+ return createUnknown(Expr);
+
+ const SCEV *Start = Expr->getStart();
+ const SCEV *Step = Expr->getStepRecurrence(*SE);
+
+ SCEVMonotonicity StartMon = visit(Start);
+ if (StartMon.isUnknown())
+ return StartMon;
+
+ if (!isLoopInvariant(Step))
+ return createUnknown(Expr);
+
+ return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic);
+}
+
+//===----------------------------------------------------------------------===//
// DependenceInfo methods
// For debugging purposes. Dumps a dependence to OS.
@@ -1273,6 +1582,13 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
return nullptr;
}
+/// Returns true iff \p Test is enabled.
+static bool isDependenceTestEnabled(DependenceTestType Test) {
+ if (EnableDependenceTest == DependenceTestType::All)
+ return true;
+ return EnableDependenceTest == Test;
+}
+
// testZIV -
// When we have a pair of subscripts of the form [c1] and [c2],
// where c1 and c2 are both loop invariant, we attack it using
@@ -1334,6 +1650,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
const Loop *CurDstLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
+ if (!isDependenceTestEnabled(DependenceTestType::StrongSIV))
+ return false;
+
LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
@@ -1468,6 +1787,9 @@ bool DependenceInfo::weakCrossingSIVtest(
const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
FullDependence &Result, Constraint &NewConstraint,
const SCEV *&SplitIter) const {
+ if (!isDependenceTestEnabled(DependenceTestType::WeakCrossingSIV))
+ return false;
+
LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
@@ -1726,6 +2048,9 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const Loop *CurDstLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
+ if (!isDependenceTestEnabled(DependenceTestType::ExactSIV))
+ return false;
+
LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
@@ -1905,6 +2230,9 @@ bool DependenceInfo::weakZeroSrcSIVtest(
const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
FullDependence &Result, Constraint &NewConstraint) const {
+ if (!isDependenceTestEnabled(DependenceTestType::WeakZeroSIV))
+ return false;
+
// For the WeakSIV test, it's possible the loop isn't common to
// the Src and Dst loops. If it isn't, then there's no need to
// record a direction.
@@ -2013,6 +2341,9 @@ bool DependenceInfo::weakZeroDstSIVtest(
const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
FullDependence &Result, Constraint &NewConstraint) const {
+ if (!isDependenceTestEnabled(DependenceTestType::WeakZeroSIV))
+ return false;
+
// For the WeakSIV test, it's possible the loop isn't common to the
// Src and Dst loops. If it isn't, then there's no need to record a direction.
LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
@@ -2096,8 +2427,9 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
const Loop *SrcLoop, const Loop *DstLoop,
FullDependence &Result) const {
- if (RunSIVRoutinesOnly)
+ if (!isDependenceTestEnabled(DependenceTestType::ExactRDIV))
return false;
+
LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
@@ -2242,8 +2574,9 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
const SCEV *C1, const SCEV *C2,
const Loop *Loop1,
const Loop *Loop2) const {
- if (RunSIVRoutinesOnly)
+ if (!isDependenceTestEnabled(DependenceTestType::SymbolicRDIV))
return false;
+
++SymbolicRDIVapplications;
LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
@@ -2557,8 +2890,9 @@ bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
// to "a common divisor".
bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
- if (RunSIVRoutinesOnly)
+ if (!isDependenceTestEnabled(DependenceTestType::GCDMIV))
return false;
+
LLVM_DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
@@ -2725,8 +3059,9 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
const SmallBitVector &Loops,
FullDependence &Result) const {
- if (RunSIVRoutinesOnly)
+ if (!isDependenceTestEnabled(DependenceTestType::BanerjeeMIV))
return false;
+
LLVM_DEBUG(dbgs() << "starting Banerjee\n");
++BanerjeeApplications;
LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
@@ -3488,10 +3823,19 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// resize Pair to contain as many pairs of subscripts as the delinearization
// has found, and then initialize the pairs following the delinearization.
Pair.resize(Size);
+ SCEVMonotonicityChecker MonChecker(SE);
+ const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
for (int I = 0; I < Size; ++I) {
Pair[I].Src = SrcSubscripts[I];
Pair[I].Dst = DstSubscripts[I];
unifySubscriptType(&Pair[I]);
+
+ if (EnableMonotonicityCheck) {
+ if (MonChecker.checkMonotonicity(Pair[I].Src, OutermostLoop).isUnknown())
+ return false;
+ if (MonChecker.checkMonotonicity(Pair[I].Dst, OutermostLoop).isUnknown())
+ return false;
+ }
}
return true;
@@ -3824,6 +4168,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
Pair[0].Src = SrcEv;
Pair[0].Dst = DstEv;
+ SCEVMonotonicityChecker MonChecker(SE);
+ const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
+ if (EnableMonotonicityCheck)
+ if (MonChecker.checkMonotonicity(Pair[0].Src, OutermostLoop).isUnknown() ||
+ MonChecker.checkMonotonicity(Pair[0].Dst, OutermostLoop).isUnknown())
+ return std::make_unique<Dependence>(Src, Dst,
+ SCEVUnionPredicate(Assume, *SE));
+
if (Delinearize) {
if (tryDelinearize(Src, Dst, Pair)) {
LLVM_DEBUG(dbgs() << " delinearized\n");
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index dc813f6..b573023 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
return nullptr;
}
+/// Look for the following pattern and simplify %to_fold to %identicalPhi.
+/// Here %phi, %to_fold and %phi.next perform the same functionality as
+/// %identicalPhi and hence the select instruction %to_fold can be folded
+/// into %identicalPhi.
+///
+/// BB1:
+/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+/// ...
+/// %identicalPhi.next = select %cmp, %val, %identicalPhi
+/// (or select %cmp, %identicalPhi, %val)
+/// %to_fold = select %cmp2, %identicalPhi, %phi
+/// %phi.next = select %cmp, %val, %to_fold
+/// (or select %cmp, %to_fold, %val)
+///
+/// Prove that %phi and %identicalPhi are the same by induction:
+///
+/// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+/// Inductive case:
+/// Suppose %phi and %identicalPhi are equal at iteration i.
+/// We look at their values at iteration i+1 which are %phi.next and
+/// %identicalPhi.next. They would have become different only when %cmp is
+/// false and the corresponding values %to_fold and %identicalPhi differ
+/// (similar reason for the other "or" case in the bracket).
+///
+/// The only condition when %to_fold and %identicalPh could differ is when %cmp2
+/// is false and %to_fold is %phi, which contradicts our inductive hypothesis
+/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+/// always equal at iteration i+1.
+bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
+ if (PN.getParent() != IdenticalPN.getParent())
+ return false;
+ if (PN.getNumIncomingValues() != 2)
+ return false;
+
+ // Check that only the backedge incoming value is different.
+ unsigned DiffVals = 0;
+ BasicBlock *DiffValBB = nullptr;
+ for (unsigned i = 0; i < 2; i++) {
+ BasicBlock *PredBB = PN.getIncomingBlock(i);
+ if (PN.getIncomingValueForBlock(PredBB) !=
+ IdenticalPN.getIncomingValueForBlock(PredBB)) {
+ DiffVals++;
+ DiffValBB = PredBB;
+ }
+ }
+ if (DiffVals != 1)
+ return false;
+ // Now check that the backedge incoming values are two select
+ // instructions with the same condition. Either their true
+ // values are the same, or their false values are the same.
+ auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB));
+ auto *IdenticalSI =
+ dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
+ if (!SI || !IdenticalSI)
+ return false;
+ if (SI->getCondition() != IdenticalSI->getCondition())
+ return false;
+
+ SelectInst *SIOtherVal = nullptr;
+ Value *IdenticalSIOtherVal = nullptr;
+ if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
+ IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+ } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) {
+ SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
+ IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+ } else {
+ return false;
+ }
+
+ // Now check that the other values in select, i.e., %to_fold and
+ // %identicalPhi, are essentially the same value.
+ if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN)
+ return false;
+ if (!(SIOtherVal->getTrueValue() == &IdenticalPN &&
+ SIOtherVal->getFalseValue() == &PN) &&
+ !(SIOtherVal->getTrueValue() == &PN &&
+ SIOtherVal->getFalseValue() == &IdenticalPN))
+ return false;
+ return true;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
if (Imp)
return *Imp ? TrueVal : FalseVal;
-
+ // Look for same PHIs in the true and false values.
+ if (auto *TruePHI = dyn_cast<PHINode>(TrueVal))
+ if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) {
+ if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI))
+ return FalseVal;
+ if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI))
+ return TrueVal;
+ }
return nullptr;
}
@@ -5106,32 +5196,33 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
return Ptr;
// The following transforms are only safe if the ptrtoint cast
- // doesn't truncate the pointers.
- if (Indices[0]->getType()->getScalarSizeInBits() ==
- Q.DL.getPointerSizeInBits(AS)) {
+ // doesn't truncate the address of the pointers. The non-address bits
+ // must be the same, as the underlying objects are the same.
+ if (Indices[0]->getType()->getScalarSizeInBits() >=
+ Q.DL.getAddressSizeInBits(AS)) {
auto CanSimplify = [GEPTy, &P, Ptr]() -> bool {
return P->getType() == GEPTy &&
getUnderlyingObject(P) == getUnderlyingObject(Ptr);
};
// getelementptr V, (sub P, V) -> P if P points to a type of size 1.
if (TyAllocSize == 1 &&
- match(Indices[0],
- m_Sub(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Specific(Ptr)))) &&
+ match(Indices[0], m_Sub(m_PtrToIntOrAddr(m_Value(P)),
+ m_PtrToIntOrAddr(m_Specific(Ptr)))) &&
CanSimplify())
return P;
// getelementptr V, (ashr (sub P, V), C) -> P if P points to a type of
// size 1 << C.
- if (match(Indices[0], m_AShr(m_Sub(m_PtrToInt(m_Value(P)),
- m_PtrToInt(m_Specific(Ptr))),
+ if (match(Indices[0], m_AShr(m_Sub(m_PtrToIntOrAddr(m_Value(P)),
+ m_PtrToIntOrAddr(m_Specific(Ptr))),
m_ConstantInt(C))) &&
TyAllocSize == 1ULL << C && CanSimplify())
return P;
// getelementptr V, (sdiv (sub P, V), C) -> P if P points to a type of
// size C.
- if (match(Indices[0], m_SDiv(m_Sub(m_PtrToInt(m_Value(P)),
- m_PtrToInt(m_Specific(Ptr))),
+ if (match(Indices[0], m_SDiv(m_Sub(m_PtrToIntOrAddr(m_Value(P)),
+ m_PtrToIntOrAddr(m_Specific(Ptr))),
m_SpecificInt(TyAllocSize))) &&
CanSimplify())
return P;
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index a60a4bb..6cdf51a 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -1100,12 +1100,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
for (auto &GlobalList : Index) {
// Ignore entries for references that are undefined in the current module.
- if (GlobalList.second.SummaryList.empty())
+ if (GlobalList.second.getSummaryList().empty())
continue;
- assert(GlobalList.second.SummaryList.size() == 1 &&
+ assert(GlobalList.second.getSummaryList().size() == 1 &&
"Expected module's index to have one summary per GUID");
- auto &Summary = GlobalList.second.SummaryList[0];
+ auto &Summary = GlobalList.second.getSummaryList()[0];
if (!IsThinLTO) {
Summary->setNotEligibleToImport();
continue;
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 5e94e0b..5e92ca1 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -1136,7 +1136,7 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
if (!AreStatisticsEnabled())
return;
for (auto &GVS : Index)
- for (auto &GV : GVS.second.SummaryList)
+ for (auto &GV : GVS.second.getSummaryList())
if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get()))
Stat += FS->paramAccesses().size();
};
@@ -1147,7 +1147,7 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
// Convert the ModuleSummaryIndex to a FunctionMap
for (auto &GVS : Index) {
- for (auto &GV : GVS.second.SummaryList) {
+ for (auto &GV : GVS.second.getSummaryList()) {
FunctionSummary *FS = dyn_cast<FunctionSummary>(GV.get());
if (!FS || FS->paramAccesses().empty())
continue;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index cf63285..f71a534 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -451,6 +451,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
UpgradeModuleFlags(*M);
UpgradeNVVMAnnotations(*M);
UpgradeSectionAttributes(*M);
+ copyModuleAttrToFunctions(*M);
if (!Slots)
return false;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index aaee1f0..cf7efbfa 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7143,6 +7143,8 @@ Error BitcodeReader::materializeModule() {
UpgradeARCRuntime(*TheModule);
+ copyModuleAttrToFunctions(*TheModule);
+
return Error::success();
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8ff3aa9..61aa7c2f5 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -235,7 +235,7 @@ public:
return;
for (const auto &GUIDSummaryLists : *Index)
// Examine all summaries for this GUID.
- for (auto &Summary : GUIDSummaryLists.second.SummaryList)
+ for (auto &Summary : GUIDSummaryLists.second.getSummaryList())
if (auto FS = dyn_cast<FunctionSummary>(Summary.get())) {
// For each call in the function summary, see if the call
// is to a GUID (which means it is for an indirect call,
@@ -587,7 +587,7 @@ public:
}
} else {
for (auto &Summaries : Index)
- for (auto &Summary : Summaries.second.SummaryList)
+ for (auto &Summary : Summaries.second.getSummaryList())
Callback({Summaries.first, Summary.get()}, false);
}
}
diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
index 72bb98c..64cbe9d 100644
--- a/llvm/lib/CAS/OnDiskGraphDB.cpp
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -836,6 +836,7 @@ uint64_t DataRecordHandle::getDataSize() const {
case DataSizeFlags::Uses8B:
return support::endian::read64le(DataSizePtr);
}
+ llvm_unreachable("Unknown DataSizeFlags enum");
}
void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset) const {
@@ -863,6 +864,7 @@ uint32_t DataRecordHandle::getNumRefs() const {
case NumRefsFlags::Uses8B:
return support::endian::read64le(NumRefsPtr);
}
+ llvm_unreachable("Unknown NumRefsFlags enum");
}
void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset) const {
@@ -1270,6 +1272,7 @@ Expected<bool> OnDiskGraphDB::isMaterialized(ObjectID Ref) {
return FaultInResult.takeError();
return true;
}
+ llvm_unreachable("Unknown ObjectPresence enum");
}
Expected<OnDiskGraphDB::ObjectPresence>
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e2af0c5..fefde64f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1438,6 +1438,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
BBFreqEnabled,
BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
+ // Use static_cast to avoid breakage of tests on windows.
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
HasCalls,
false};
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index f0f0861..c7d45897 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -566,32 +566,54 @@ bool DwarfExpression::addExpression(
case dwarf::DW_OP_LLVM_extract_bits_zext: {
unsigned SizeInBits = Op->getArg(1);
unsigned BitOffset = Op->getArg(0);
+ unsigned DerefSize = 0;
+ // Operations are done in the DWARF "generic type" whose size
+ // is the size of a pointer.
+ unsigned PtrSizeInBytes = CU.getAsmPrinter()->MAI->getCodePointerSize();
// If we have a memory location then dereference to get the value, though
// we have to make sure we don't dereference any bytes past the end of the
// object.
if (isMemoryLocation()) {
- emitOp(dwarf::DW_OP_deref_size);
- emitUnsigned(alignTo(BitOffset + SizeInBits, 8) / 8);
+ DerefSize = alignTo(BitOffset + SizeInBits, 8) / 8;
+ if (DerefSize == PtrSizeInBytes) {
+ emitOp(dwarf::DW_OP_deref);
+ } else {
+ emitOp(dwarf::DW_OP_deref_size);
+ emitUnsigned(DerefSize);
+ }
}
- // Extract the bits by a shift left (to shift out the bits after what we
- // want to extract) followed by shift right (to shift the bits to position
- // 0 and also sign/zero extend). These operations are done in the DWARF
- // "generic type" whose size is the size of a pointer.
- unsigned PtrSizeInBytes = CU.getAsmPrinter()->MAI->getCodePointerSize();
- unsigned LeftShift = PtrSizeInBytes * 8 - (SizeInBits + BitOffset);
- unsigned RightShift = LeftShift + BitOffset;
- if (LeftShift) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(LeftShift);
- emitOp(dwarf::DW_OP_shl);
- }
- if (RightShift) {
- emitOp(dwarf::DW_OP_constu);
- emitUnsigned(RightShift);
- emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext ? dwarf::DW_OP_shra
- : dwarf::DW_OP_shr);
+ // If a dereference was emitted for an unsigned value, and
+ // there's no bit offset, then a bit of optimization is
+ // possible.
+ if (OpNum == dwarf::DW_OP_LLVM_extract_bits_zext && BitOffset == 0) {
+ if (8 * DerefSize == SizeInBits) {
+ // The correct value is already on the stack.
+ } else {
+ // No need to shift, we can just mask off the desired bits.
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned((1u << SizeInBits) - 1);
+ emitOp(dwarf::DW_OP_and);
+ }
+ } else {
+ // Extract the bits by a shift left (to shift out the bits after what we
+ // want to extract) followed by shift right (to shift the bits to
+ // position 0 and also sign/zero extend).
+ unsigned LeftShift = PtrSizeInBytes * 8 - (SizeInBits + BitOffset);
+ unsigned RightShift = LeftShift + BitOffset;
+ if (LeftShift) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(LeftShift);
+ emitOp(dwarf::DW_OP_shl);
+ }
+ if (RightShift) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(RightShift);
+ emitOp(OpNum == dwarf::DW_OP_LLVM_extract_bits_sext
+ ? dwarf::DW_OP_shra
+ : dwarf::DW_OP_shr);
+ }
}
// The value is now at the top of the stack, so set the location to
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index c438eae..9795a0b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -98,6 +98,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineUniformityAnalysisPassPass(Registry);
initializeMIR2VecVocabLegacyAnalysisPass(Registry);
initializeMIR2VecVocabPrinterLegacyPassPass(Registry);
+ initializeMIR2VecPrinterLegacyPassPass(Registry);
initializeMachineUniformityInfoPrinterPassPass(Registry);
initializeMachineVerifierLegacyPassPass(Registry);
initializeObjCARCContractLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 0522698..c1365f4 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -64,7 +64,6 @@ CGOPT_EXP(uint64_t, LargeDataThreshold)
CGOPT(ExceptionHandling, ExceptionModel)
CGOPT_EXP(CodeGenFileType, FileType)
CGOPT(FramePointerKind, FramePointerUsage)
-CGOPT(bool, EnableUnsafeFPMath)
CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
@@ -219,12 +218,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Enable frame pointer elimination")));
CGBINDOPT(FramePointerUsage);
- static cl::opt<bool> EnableUnsafeFPMath(
- "enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::init(false));
- CGBINDOPT(EnableUnsafeFPMath);
-
static cl::opt<bool> EnableNoInfsFPMath(
"enable-no-infs-fp-math",
cl::desc("Enable FP math optimizations that assume no +-Infs"),
@@ -552,7 +545,6 @@ TargetOptions
codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
TargetOptions Options;
Options.AllowFPOpFusion = getFuseFPOps();
- Options.UnsafeFPMath = getEnableUnsafeFPMath();
Options.NoInfsFPMath = getEnableNoInfsFPMath();
Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
@@ -706,7 +698,6 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
if (getStackRealign())
NewAttrs.addAttribute("stackrealign");
- HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math");
HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index f80e1e8..3ac6d2a 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -1498,7 +1498,7 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
// Before stepping forward past MI, remember which regs were live
// before MI. This is needed to set the Undef flag only when reg is
// dead.
- SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI;
+ SparseSet<MCPhysReg, MCPhysReg> LiveBeforeMI;
LiveBeforeMI.setUniverse(TRI->getNumRegs());
for (unsigned Reg : Redefs)
LiveBeforeMI.insert(Reg);
diff --git a/llvm/lib/CodeGen/MIR2Vec.cpp b/llvm/lib/CodeGen/MIR2Vec.cpp
index 5c78d98..99be1fc0 100644
--- a/llvm/lib/CodeGen/MIR2Vec.cpp
+++ b/llvm/lib/CodeGen/MIR2Vec.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIR2Vec.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Module.h"
@@ -29,20 +30,30 @@ using namespace mir2vec;
STATISTIC(MIRVocabMissCounter,
"Number of lookups to MIR entities not present in the vocabulary");
-cl::OptionCategory llvm::mir2vec::MIR2VecCategory("MIR2Vec Options");
+namespace llvm {
+namespace mir2vec {
+cl::OptionCategory MIR2VecCategory("MIR2Vec Options");
// FIXME: Use a default vocab when not specified
static cl::opt<std::string>
VocabFile("mir2vec-vocab-path", cl::Optional,
cl::desc("Path to the vocabulary file for MIR2Vec"), cl::init(""),
cl::cat(MIR2VecCategory));
-cl::opt<float>
- llvm::mir2vec::OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
- cl::desc("Weight for machine opcode embeddings"),
- cl::cat(MIR2VecCategory));
+cl::opt<float> OpcWeight("mir2vec-opc-weight", cl::Optional, cl::init(1.0),
+ cl::desc("Weight for machine opcode embeddings"),
+ cl::cat(MIR2VecCategory));
+cl::opt<MIR2VecKind> MIR2VecEmbeddingKind(
+ "mir2vec-kind", cl::Optional,
+ cl::values(clEnumValN(MIR2VecKind::Symbolic, "symbolic",
+ "Generate symbolic embeddings for MIR")),
+ cl::init(MIR2VecKind::Symbolic), cl::desc("MIR2Vec embedding kind"),
+ cl::cat(MIR2VecCategory));
+
+} // namespace mir2vec
+} // namespace llvm
//===----------------------------------------------------------------------===//
-// Vocabulary Implementation
+// Vocabulary
//===----------------------------------------------------------------------===//
MIRVocabulary::MIRVocabulary(VocabMap &&OpcodeEntries,
@@ -188,6 +199,28 @@ void MIRVocabulary::buildCanonicalOpcodeMapping() {
<< " unique base opcodes\n");
}
+Expected<MIRVocabulary>
+MIRVocabulary::createDummyVocabForTest(const TargetInstrInfo &TII,
+ unsigned Dim) {
+ assert(Dim > 0 && "Dimension must be greater than zero");
+
+ float DummyVal = 0.1f;
+
+ // Create dummy embeddings for all canonical opcode names
+ VocabMap DummyVocabMap;
+ for (unsigned Opcode = 0; Opcode < TII.getNumOpcodes(); ++Opcode) {
+ std::string BaseOpcode = extractBaseOpcodeName(TII.getName(Opcode));
+ if (DummyVocabMap.count(BaseOpcode) == 0) {
+ // Only add if not already present
+ DummyVocabMap[BaseOpcode] = Embedding(Dim, DummyVal);
+ DummyVal += 0.1f;
+ }
+ }
+
+ // Create and return vocabulary with dummy embeddings
+ return MIRVocabulary::create(std::move(DummyVocabMap), TII);
+}
+
//===----------------------------------------------------------------------===//
// MIR2VecVocabLegacyAnalysis Implementation
//===----------------------------------------------------------------------===//
@@ -258,7 +291,73 @@ MIR2VecVocabLegacyAnalysis::getMIR2VecVocabulary(const Module &M) {
}
//===----------------------------------------------------------------------===//
-// Printer Passes Implementation
+// MIREmbedder and its subclasses
+//===----------------------------------------------------------------------===//
+
+std::unique_ptr<MIREmbedder> MIREmbedder::create(MIR2VecKind Mode,
+ const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ switch (Mode) {
+ case MIR2VecKind::Symbolic:
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+ }
+ return nullptr;
+}
+
+Embedding MIREmbedder::computeEmbeddings(const MachineBasicBlock &MBB) const {
+ Embedding MBBVector(Dimension, 0);
+
+ // Get instruction info for opcode name resolution
+ const auto &Subtarget = MF.getSubtarget();
+ const auto *TII = Subtarget.getInstrInfo();
+ if (!TII) {
+ MF.getFunction().getContext().emitError(
+ "MIR2Vec: No TargetInstrInfo available; cannot compute embeddings");
+ return MBBVector;
+ }
+
+ // Process each machine instruction in the basic block
+ for (const auto &MI : MBB) {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ continue;
+ MBBVector += computeEmbeddings(MI);
+ }
+
+ return MBBVector;
+}
+
+Embedding MIREmbedder::computeEmbeddings() const {
+ Embedding MFuncVector(Dimension, 0);
+
+ // Consider all reachable machine basic blocks in the function
+ for (const auto *MBB : depth_first(&MF))
+ MFuncVector += computeEmbeddings(*MBB);
+ return MFuncVector;
+}
+
+SymbolicMIREmbedder::SymbolicMIREmbedder(const MachineFunction &MF,
+ const MIRVocabulary &Vocab)
+ : MIREmbedder(MF, Vocab) {}
+
+std::unique_ptr<SymbolicMIREmbedder>
+SymbolicMIREmbedder::create(const MachineFunction &MF,
+ const MIRVocabulary &Vocab) {
+ return std::make_unique<SymbolicMIREmbedder>(MF, Vocab);
+}
+
+Embedding SymbolicMIREmbedder::computeEmbeddings(const MachineInstr &MI) const {
+ // Skip debug instructions and other metadata
+ if (MI.isDebugInstr())
+ return Embedding(Dimension, 0);
+
+ // Todo: Add operand/argument contributions
+
+ return Vocab[MI.getOpcode()];
+}
+
+//===----------------------------------------------------------------------===//
+// Printer Passes
//===----------------------------------------------------------------------===//
char MIR2VecVocabPrinterLegacyPass::ID = 0;
@@ -297,3 +396,56 @@ MachineFunctionPass *
llvm::createMIR2VecVocabPrinterLegacyPass(raw_ostream &OS) {
return new MIR2VecVocabPrinterLegacyPass(OS);
}
+
+char MIR2VecPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+INITIALIZE_PASS_DEPENDENCY(MIR2VecVocabLegacyAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(MIR2VecPrinterLegacyPass, "print-mir2vec",
+ "MIR2Vec Embedder Printer Pass", false, true)
+
+bool MIR2VecPrinterLegacyPass::runOnMachineFunction(MachineFunction &MF) {
+ auto &Analysis = getAnalysis<MIR2VecVocabLegacyAnalysis>();
+ auto VocabOrErr =
+ Analysis.getMIR2VecVocabulary(*MF.getFunction().getParent());
+ assert(VocabOrErr && "Failed to get MIR2Vec vocabulary");
+ auto &MIRVocab = *VocabOrErr;
+
+ auto Emb = mir2vec::MIREmbedder::create(MIR2VecEmbeddingKind, MF, MIRVocab);
+ if (!Emb) {
+ OS << "Error creating MIR2Vec embeddings for function " << MF.getName()
+ << "\n";
+ return false;
+ }
+
+ OS << "MIR2Vec embeddings for machine function " << MF.getName() << ":\n";
+ OS << "Machine Function vector: ";
+ Emb->getMFunctionVector().print(OS);
+
+ OS << "Machine basic block vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ OS << "Machine basic block: " << MBB.getFullName() << ":\n";
+ Emb->getMBBVector(MBB).print(OS);
+ }
+
+ OS << "Machine instruction vectors:\n";
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ // Skip debug instructions as they are not
+ // embedded
+ if (MI.isDebugInstr())
+ continue;
+
+ OS << "Machine instruction: ";
+ MI.print(OS);
+ Emb->getMInstVector(MI).print(OS);
+ }
+ }
+
+ return false;
+}
+
+MachineFunctionPass *llvm::createMIR2VecPrinterLegacyPass(raw_ostream &OS) {
+ return new MIR2VecPrinterLegacyPass(OS);
+}
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index d988a2a..e37f784 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
@@ -35,13 +36,10 @@ using namespace sampleprofutil;
// TODO(xur): Remove this option and related code once we make true as the
// default.
-namespace llvm {
-cl::opt<bool> ImprovedFSDiscriminator(
+cl::opt<bool> llvm::ImprovedFSDiscriminator(
"improved-fs-discriminator", cl::Hidden, cl::init(false),
cl::desc("New FS discriminators encoding (incompatible with the original "
"encoding)"));
-} // namespace llvm
-
char MIRAddFSDiscriminators::ID = 0;
INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 9bba50e8..d44f577 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MIRFSDiscriminatorOptions.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -62,9 +63,6 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
-namespace llvm {
-extern cl::opt<bool> ImprovedFSDiscriminator;
-}
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 7acddff..729e73c 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -932,12 +932,11 @@ void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
bool ConsiderUnseenAsDef) {
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
- for (const auto &RPIdAndCost : Cost) {
- unsigned Class = RPIdAndCost.first;
- if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+ for (const auto &[Class, Weight] : Cost) {
+ if (static_cast<int>(RegPressure[Class]) < -Weight)
RegPressure[Class] = 0;
else
- RegPressure[Class] += RPIdAndCost.second;
+ RegPressure[Class] += Weight;
}
}
@@ -1215,11 +1214,10 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
/// given cost matrix can cause high register pressure.
bool MachineLICMImpl::CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
- for (const auto &RPIdAndCost : Cost) {
- if (RPIdAndCost.second <= 0)
+ for (const auto &[Class, Weight] : Cost) {
+ if (Weight <= 0)
continue;
- unsigned Class = RPIdAndCost.first;
int Limit = RegLimit[Class];
// Don't hoist cheap instructions if they would increase register pressure,
@@ -1228,7 +1226,7 @@ bool MachineLICMImpl::CanCauseHighRegPressure(
return true;
for (const auto &RP : BackTrace)
- if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
+ if (static_cast<int>(RP[Class]) + Weight >= Limit)
return true;
}
@@ -1246,8 +1244,8 @@ void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
// Update register pressure of blocks from loop header to current block.
for (auto &RP : BackTrace)
- for (const auto &RPIdAndCost : Cost)
- RP[RPIdAndCost.first] += RPIdAndCost.second;
+ for (const auto &[Class, Weight] : Cost)
+ RP[Class] += Weight;
}
/// Return true if it is potentially profitable to hoist the given loop
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 804480c..72b364c 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -211,7 +211,7 @@ private:
unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }
};
- using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
+ using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>;
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008..310d35d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16433,7 +16433,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
case ISD::OR:
case ISD::XOR:
if (!LegalOperations && N0.hasOneUse() &&
- (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ (N0.getOperand(0) == N0.getOperand(1) ||
+ isConstantOrConstantVector(N0.getOperand(0), true) ||
isConstantOrConstantVector(N0.getOperand(1), true))) {
// TODO: We already restricted this to pre-legalization, but for vectors
// we are extra cautious to not create an unsupported operation.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 437d0f4..bf1abfe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3765,6 +3765,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::LRINT:
case ISD::LLRINT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
Res = SoftPromoteHalfOp_Op0WithStrict(N);
break;
case ISD::FP_TO_SINT_SAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 88a4a8b..b1776ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -429,7 +429,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op2 = N->getOperand(2);
+ switch (TLI.getExtendForAtomicRMWArg(N->getOpcode())) {
+ case ISD::SIGN_EXTEND:
+ Op2 = SExtPromotedInteger(Op2);
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
+ Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 5eb86e7..049efe8 100644
--- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -51,7 +51,7 @@ bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const {
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
- return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ return HonorSignDependentRoundingFPMathOption;
}
/// NOTE: There are targets that still do not support the debug entry values
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 212a0c0..db5cc37 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -107,6 +107,28 @@ static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) {
return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference();
}
+static llvm::StringRef
+prettyLanguageVersionString(const DWARFAttribute &AttrValue,
+ const DWARFDie &Die) {
+ if (AttrValue.Attr != DW_AT_language_version)
+ return {};
+
+ auto NameForm = Die.find(DW_AT_language_name);
+ if (!NameForm)
+ return {};
+
+ auto LName = NameForm->getAsUnsignedConstant();
+ if (!LName)
+ return {};
+
+ auto LVersion = AttrValue.Value.getAsUnsignedConstant();
+ if (!LVersion)
+ return {};
+
+ return llvm::dwarf::LanguageDescription(
+ static_cast<SourceLanguageName>(*LName), *LVersion);
+}
+
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
DIDumpOptions DumpOpts) {
@@ -146,15 +168,28 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
} else if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
Name = AttributeValueString(Attr, *Val);
- if (!Name.empty())
- WithColor(OS, Color) << Name;
- else if (Attr == DW_AT_decl_line || Attr == DW_AT_decl_column ||
- Attr == DW_AT_call_line || Attr == DW_AT_call_column ||
- Attr == DW_AT_language_version) {
+ auto DumpUnsignedConstant = [&OS,
+ &DumpOpts](const DWARFFormValue &FormValue) {
if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
OS << *Val;
else
FormValue.dump(OS, DumpOpts);
+ };
+
+ llvm::StringRef PrettyVersionName =
+ prettyLanguageVersionString(AttrValue, Die);
+ bool ShouldDumpRawLanguageVersion =
+ Attr == DW_AT_language_version &&
+ (DumpOpts.Verbose || PrettyVersionName.empty());
+
+ if (!Name.empty())
+ WithColor(OS, Color) << Name;
+ else if (Attr == DW_AT_decl_line || Attr == DW_AT_decl_column ||
+ Attr == DW_AT_call_line || Attr == DW_AT_call_column) {
+ DumpUnsignedConstant(FormValue);
+ } else if (Attr == DW_AT_language_version) {
+ if (ShouldDumpRawLanguageVersion)
+ DumpUnsignedConstant(FormValue);
} else if (Attr == DW_AT_low_pc &&
(FormValue.getAsAddress() ==
dwarf::computeTombstoneAddress(U->getAddressByteSize()))) {
@@ -226,6 +261,10 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
DumpOpts.RecoverableErrorHandler(createStringError(
errc::invalid_argument, "decoding address ranges: %s",
toString(RangesOrError.takeError()).c_str()));
+ } else if (Attr == DW_AT_language_version) {
+ if (!PrettyVersionName.empty())
+ WithColor(OS, Color) << (ShouldDumpRawLanguageVersion ? " " : "")
+ << PrettyVersionName;
}
OS << ")\n";
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index f47b7ec..8d413a3 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -1173,39 +1173,7 @@ void JITDylib::dump(raw_ostream &OS) {
<< " pending queries: { ";
for (const auto &Q : KV.second.pendingQueries())
OS << Q.get() << " (" << Q->getRequiredState() << ") ";
- OS << "}\n Defining EDU: ";
- if (KV.second.DefiningEDU) {
- OS << KV.second.DefiningEDU.get() << " { ";
- for (auto &[Name, Flags] : KV.second.DefiningEDU->Symbols)
- OS << Name << " ";
- OS << "}\n";
- OS << " Dependencies:\n";
- if (!KV.second.DefiningEDU->Dependencies.empty()) {
- for (auto &[DepJD, Deps] : KV.second.DefiningEDU->Dependencies) {
- OS << " " << DepJD->getName() << ": [ ";
- for (auto &Dep : Deps)
- OS << Dep << " ";
- OS << "]\n";
- }
- } else
- OS << " none\n";
- } else
- OS << "none\n";
- OS << " Dependant EDUs:\n";
- if (!KV.second.DependantEDUs.empty()) {
- for (auto &DependantEDU : KV.second.DependantEDUs) {
- OS << " " << DependantEDU << ": "
- << DependantEDU->JD->getName() << " { ";
- for (auto &[Name, Flags] : DependantEDU->Symbols)
- OS << Name << " ";
- OS << "}\n";
- }
- } else
- OS << " none\n";
- assert((Symbols[KV.first].getState() != SymbolState::Ready ||
- (KV.second.pendingQueries().empty() && !KV.second.DefiningEDU &&
- !KV.second.DependantEDUs.empty())) &&
- "Stale materializing info entry");
+ OS << "}\n";
}
});
}
@@ -1967,9 +1935,6 @@ bool ExecutionSession::verifySessionState(Twine Phase) {
return runSessionLocked([&]() {
bool AllOk = true;
- // We'll collect these and verify them later to avoid redundant checks.
- DenseSet<JITDylib::EmissionDepUnit *> EDUsToCheck;
-
for (auto &JD : JDs) {
auto LogFailure = [&]() -> raw_fd_ostream & {
@@ -2063,86 +2028,6 @@ bool ExecutionSession::verifySessionState(Twine Phase) {
<< " has stale or misordered queries.\n";
}
}
-
- // If there's a DefiningEDU then check that...
- // 1. The JD matches.
- // 2. The symbol is in the EDU's Symbols map.
- // 3. The symbol table entry is in the Emitted state.
- if (MII.DefiningEDU) {
-
- EDUsToCheck.insert(MII.DefiningEDU.get());
-
- if (MII.DefiningEDU->JD != JD.get()) {
- LogFailure() << "symbol " << Sym
- << " has DefiningEDU with incorrect JD"
- << (llvm::is_contained(JDs, MII.DefiningEDU->JD)
- ? " (JD not currently in ExecutionSession"
- : "")
- << "\n";
- }
-
- if (SymItr->second.getState() != SymbolState::Emitted) {
- LogFailure()
- << "symbol " << Sym
- << " has DefiningEDU, but is not in Emitted state.\n";
- }
- }
-
- // Check that JDs for any DependantEDUs are also in the session --
- // that guarantees that we'll also visit them during this loop.
- for (auto &DepEDU : MII.DependantEDUs) {
- if (!llvm::is_contained(JDs, DepEDU->JD)) {
- LogFailure() << "symbol " << Sym << " has DependantEDU "
- << (void *)DepEDU << " with JD (" << DepEDU->JD
- << ") that isn't in ExecutionSession.\n";
- }
- }
- }
- }
- }
-
- // Check EDUs.
- for (auto *EDU : EDUsToCheck) {
- assert(EDU->JD->State == JITDylib::Open && "EDU->JD is not Open");
-
- auto LogFailure = [&]() -> raw_fd_ostream & {
- AllOk = false;
- auto &Stream = errs();
- Stream << "In EDU defining " << EDU->JD->getName() << ": { ";
- for (auto &[Sym, Flags] : EDU->Symbols)
- Stream << Sym << " ";
- Stream << "}, ";
- return Stream;
- };
-
- if (EDU->Symbols.empty())
- LogFailure() << "no symbols defined.\n";
- else {
- for (auto &[Sym, Flags] : EDU->Symbols) {
- if (!Sym)
- LogFailure() << "null symbol defined.\n";
- else {
- if (!EDU->JD->Symbols.count(SymbolStringPtr(Sym))) {
- LogFailure() << "symbol " << Sym
- << " isn't present in JD's symbol table.\n";
- }
- }
- }
- }
-
- for (auto &[DepJD, Symbols] : EDU->Dependencies) {
- if (!llvm::is_contained(JDs, DepJD)) {
- LogFailure() << "dependant symbols listed for JD that isn't in "
- "ExecutionSession.\n";
- } else {
- for (auto &DepSym : Symbols) {
- if (!DepJD->Symbols.count(SymbolStringPtr(DepSym))) {
- LogFailure()
- << "dependant symbol " << DepSym
- << " does not appear in symbol table for dependant JD "
- << DepJD->getName() << ".\n";
- }
- }
}
}
}
@@ -2917,359 +2802,64 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
return MR.JD.resolve(MR, Symbols);
}
-template <typename HandleNewDepFn>
-void ExecutionSession::propagateExtraEmitDeps(
- std::deque<JITDylib::EmissionDepUnit *> Worklist, EDUInfosMap &EDUInfos,
- HandleNewDepFn HandleNewDep) {
-
- // Iterate to a fixed-point to propagate extra-emit dependencies through the
- // EDU graph.
- while (!Worklist.empty()) {
- auto &EDU = *Worklist.front();
- Worklist.pop_front();
-
- assert(EDUInfos.count(&EDU) && "No info entry for EDU");
- auto &EDUInfo = EDUInfos[&EDU];
-
- // Propagate new dependencies to users.
- for (auto *UserEDU : EDUInfo.IntraEmitUsers) {
-
- // UserEDUInfo only present if UserEDU has its own users.
- JITDylib::EmissionDepUnitInfo *UserEDUInfo = nullptr;
- {
- auto UserEDUInfoItr = EDUInfos.find(UserEDU);
- if (UserEDUInfoItr != EDUInfos.end())
- UserEDUInfo = &UserEDUInfoItr->second;
- }
-
- for (auto &[DepJD, Deps] : EDUInfo.NewDeps) {
- auto &UserEDUDepsForJD = UserEDU->Dependencies[DepJD];
- DenseSet<NonOwningSymbolStringPtr> *UserEDUNewDepsForJD = nullptr;
- for (auto Dep : Deps) {
- if (UserEDUDepsForJD.insert(Dep).second) {
- HandleNewDep(*UserEDU, *DepJD, Dep);
- if (UserEDUInfo) {
- if (!UserEDUNewDepsForJD) {
- // If UserEDU has no new deps then it's not in the worklist
- // yet, so add it.
- if (UserEDUInfo->NewDeps.empty())
- Worklist.push_back(UserEDU);
- UserEDUNewDepsForJD = &UserEDUInfo->NewDeps[DepJD];
- }
- // Add (DepJD, Dep) to NewDeps.
- UserEDUNewDepsForJD->insert(Dep);
- }
- }
+WaitingOnGraph::ExternalState
+ExecutionSession::IL_getSymbolState(JITDylib *JD,
+ NonOwningSymbolStringPtr Name) {
+ if (JD->State != JITDylib::Open)
+ return WaitingOnGraph::ExternalState::Failed;
+
+ auto I = JD->Symbols.find_as(Name);
+
+ // FIXME: Can we eliminate this possibility if we support query binding?
+ if (I == JD->Symbols.end())
+ return WaitingOnGraph::ExternalState::Failed;
+
+ if (I->second.getFlags().hasError())
+ return WaitingOnGraph::ExternalState::Failed;
+
+ if (I->second.getState() == SymbolState::Ready)
+ return WaitingOnGraph::ExternalState::Ready;
+
+ return WaitingOnGraph::ExternalState::None;
+}
+
+template <typename UpdateSymbolFn, typename UpdateQueryFn>
+void ExecutionSession::IL_collectQueries(
+ JITDylib::AsynchronousSymbolQuerySet &Qs,
+ WaitingOnGraph::ContainerElementsMap &QualifiedSymbols,
+ UpdateSymbolFn &&UpdateSymbol, UpdateQueryFn &&UpdateQuery) {
+
+ for (auto &[JD, Symbols] : QualifiedSymbols) {
+ // IL_emit and JITDylib removal are synchronized by the session lock.
+ // Since JITDylib removal removes any contained nodes from the
+ // WaitingOnGraph, we should be able to assert that all nodes in the
+ // WaitingOnGraph have not been removed.
+ assert(JD->State == JITDylib::Open &&
+ "WaitingOnGraph includes definition in defunct JITDylib");
+ for (auto &Symbol : Symbols) {
+ // Update symbol table.
+ auto I = JD->Symbols.find_as(Symbol);
+ assert(I != JD->Symbols.end() &&
+ "Failed Symbol missing from JD symbol table");
+ auto &Entry = I->second;
+ UpdateSymbol(Entry);
+
+ // Collect queries.
+ auto J = JD->MaterializingInfos.find_as(Symbol);
+ if (J != JD->MaterializingInfos.end()) {
+ for (auto &Q : J->second.takeAllPendingQueries()) {
+ UpdateQuery(*Q, *JD, Symbol, Entry);
+ Qs.insert(std::move(Q));
}
+ JD->MaterializingInfos.erase(J);
}
}
-
- EDUInfo.NewDeps.clear();
- }
-}
-
-// Note: This method modifies the emitted set.
-ExecutionSession::EDUInfosMap ExecutionSession::simplifyDepGroups(
- MaterializationResponsibility &MR,
- ArrayRef<SymbolDependenceGroup> EmittedDeps) {
-
- auto &TargetJD = MR.getTargetJITDylib();
-
- // 1. Build initial EmissionDepUnit -> EmissionDepUnitInfo and
- // Symbol -> EmissionDepUnit mappings.
- DenseMap<JITDylib::EmissionDepUnit *, JITDylib::EmissionDepUnitInfo> EDUInfos;
- EDUInfos.reserve(EmittedDeps.size());
- DenseMap<NonOwningSymbolStringPtr, JITDylib::EmissionDepUnit *> EDUForSymbol;
- for (auto &DG : EmittedDeps) {
- assert(!DG.Symbols.empty() && "DepGroup does not cover any symbols");
-
- // Skip empty EDUs.
- if (DG.Dependencies.empty())
- continue;
-
- auto TmpEDU = std::make_shared<JITDylib::EmissionDepUnit>(TargetJD);
- auto &EDUInfo = EDUInfos[TmpEDU.get()];
- EDUInfo.EDU = std::move(TmpEDU);
- for (const auto &Symbol : DG.Symbols) {
- NonOwningSymbolStringPtr NonOwningSymbol(Symbol);
- assert(!EDUForSymbol.count(NonOwningSymbol) &&
- "Symbol should not appear in more than one SymbolDependenceGroup");
- assert(MR.getSymbols().count(Symbol) &&
- "Symbol in DepGroups not in the emitted set");
- auto NewlyEmittedItr = MR.getSymbols().find(Symbol);
- EDUInfo.EDU->Symbols[NonOwningSymbol] = NewlyEmittedItr->second;
- EDUForSymbol[NonOwningSymbol] = EDUInfo.EDU.get();
- }
- }
-
- // 2. Build a "residual" EDU to cover all symbols that have no dependencies.
- {
- DenseMap<NonOwningSymbolStringPtr, JITSymbolFlags> ResidualSymbolFlags;
- for (auto &[Sym, Flags] : MR.getSymbols()) {
- if (!EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)))
- ResidualSymbolFlags[NonOwningSymbolStringPtr(Sym)] = Flags;
- }
- if (!ResidualSymbolFlags.empty()) {
- auto ResidualEDU = std::make_shared<JITDylib::EmissionDepUnit>(TargetJD);
- ResidualEDU->Symbols = std::move(ResidualSymbolFlags);
- auto &ResidualEDUInfo = EDUInfos[ResidualEDU.get()];
- ResidualEDUInfo.EDU = std::move(ResidualEDU);
-
- // If the residual EDU is the only one then bail out early.
- if (EDUInfos.size() == 1)
- return EDUInfos;
-
- // Otherwise add the residual EDU to the EDUForSymbol map.
- for (auto &[Sym, Flags] : ResidualEDUInfo.EDU->Symbols)
- EDUForSymbol[Sym] = ResidualEDUInfo.EDU.get();
- }
- }
-
-#ifndef NDEBUG
- assert(EDUForSymbol.size() == MR.getSymbols().size() &&
- "MR symbols not fully covered by EDUs?");
- for (auto &[Sym, Flags] : MR.getSymbols()) {
- assert(EDUForSymbol.count(NonOwningSymbolStringPtr(Sym)) &&
- "Sym in MR not covered by EDU");
- }
-#endif // NDEBUG
-
- // 3. Use the DepGroups array to build a graph of dependencies between
- // EmissionDepUnits in this finalization. We want to remove these
- // intra-finalization uses, propagating dependencies on symbols outside
- // this finalization. Add EDUs to the worklist.
- for (auto &DG : EmittedDeps) {
-
- // Skip SymbolDependenceGroups with no dependencies.
- if (DG.Dependencies.empty())
- continue;
-
- assert(EDUForSymbol.count(NonOwningSymbolStringPtr(*DG.Symbols.begin())) &&
- "No EDU for DG");
- auto &EDU =
- *EDUForSymbol.find(NonOwningSymbolStringPtr(*DG.Symbols.begin()))
- ->second;
-
- for (auto &[DepJD, Deps] : DG.Dependencies) {
- DenseSet<NonOwningSymbolStringPtr> NewDepsForJD;
-
- assert(!Deps.empty() && "Dependence set for DepJD is empty");
-
- if (DepJD != &TargetJD) {
- // DepJD is some other JITDylib.There can't be any intra-finalization
- // edges here, so just skip.
- for (auto &Dep : Deps)
- NewDepsForJD.insert(NonOwningSymbolStringPtr(Dep));
- } else {
- // DepJD is the Target JITDylib. Check for intra-finaliztaion edges,
- // skipping any and recording the intra-finalization use instead.
- for (auto &Dep : Deps) {
- NonOwningSymbolStringPtr NonOwningDep(Dep);
- auto I = EDUForSymbol.find(NonOwningDep);
- if (I == EDUForSymbol.end()) {
- if (!MR.getSymbols().count(Dep))
- NewDepsForJD.insert(NonOwningDep);
- continue;
- }
-
- if (I->second != &EDU)
- EDUInfos[I->second].IntraEmitUsers.insert(&EDU);
- }
- }
-
- if (!NewDepsForJD.empty())
- EDU.Dependencies[DepJD] = std::move(NewDepsForJD);
- }
- }
-
- // 4. Build the worklist.
- std::deque<JITDylib::EmissionDepUnit *> Worklist;
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- // If this EDU has extra-finalization dependencies and intra-finalization
- // users then add it to the worklist.
- if (!EDU->Dependencies.empty()) {
- auto I = EDUInfos.find(EDU);
- if (I != EDUInfos.end()) {
- auto &EDUInfo = I->second;
- if (!EDUInfo.IntraEmitUsers.empty()) {
- EDUInfo.NewDeps = EDU->Dependencies;
- Worklist.push_back(EDU);
- }
- }
- }
- }
-
- // 4. Propagate dependencies through the EDU graph.
- propagateExtraEmitDeps(
- Worklist, EDUInfos,
- [](JITDylib::EmissionDepUnit &, JITDylib &, NonOwningSymbolStringPtr) {});
-
- return EDUInfos;
-}
-
-void ExecutionSession::IL_makeEDUReady(
- std::shared_ptr<JITDylib::EmissionDepUnit> EDU,
- JITDylib::AsynchronousSymbolQuerySet &Queries) {
-
- // The symbols for this EDU are ready.
- auto &JD = *EDU->JD;
-
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
-
- assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() &&
- Entry.getState() == SymbolState::Materializing) ||
- Entry.getState() == SymbolState::Resolved ||
- Entry.getState() == SymbolState::Emitted) &&
- "Emitting from state other than Resolved");
-
- Entry.setState(SymbolState::Ready);
-
- auto MII = JD.MaterializingInfos.find(SymbolStringPtr(Sym));
-
- // Check for pending queries.
- if (MII == JD.MaterializingInfos.end())
- continue;
- auto &MI = MII->second;
-
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
- Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol());
- if (Q->isComplete())
- Queries.insert(Q);
- Q->removeQueryDependence(JD, SymbolStringPtr(Sym));
- }
-
- JD.MaterializingInfos.erase(MII);
- }
-
- JD.shrinkMaterializationInfoMemory();
-}
-
-void ExecutionSession::IL_makeEDUEmitted(
- std::shared_ptr<JITDylib::EmissionDepUnit> EDU,
- JITDylib::AsynchronousSymbolQuerySet &Queries) {
-
- // The symbols for this EDU are emitted, but not ready.
- auto &JD = *EDU->JD;
-
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
-
- assert(((Entry.getFlags().hasMaterializationSideEffectsOnly() &&
- Entry.getState() == SymbolState::Materializing) ||
- Entry.getState() == SymbolState::Resolved ||
- Entry.getState() == SymbolState::Emitted) &&
- "Emitting from state other than Resolved");
-
- if (Entry.getState() == SymbolState::Emitted) {
- // This was already emitted, so we can skip the rest of this loop.
-#ifndef NDEBUG
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(JD.Symbols.count(SymbolStringPtr(Sym)) &&
- "JD does not have an entry for Sym");
- auto &Entry = JD.Symbols[SymbolStringPtr(Sym)];
- assert(Entry.getState() == SymbolState::Emitted &&
- "Symbols for EDU in inconsistent state");
- assert(JD.MaterializingInfos.count(SymbolStringPtr(Sym)) &&
- "Emitted symbol has no MI");
- auto MI = JD.MaterializingInfos[SymbolStringPtr(Sym)];
- assert(MI.takeQueriesMeeting(SymbolState::Emitted).empty() &&
- "Already-emitted symbol has waiting-on-emitted queries");
- }
-#endif // NDEBUG
- break;
- }
-
- Entry.setState(SymbolState::Emitted);
- auto &MI = JD.MaterializingInfos[SymbolStringPtr(Sym)];
- MI.DefiningEDU = EDU;
-
- for (auto &Q : MI.takeQueriesMeeting(SymbolState::Emitted)) {
- Q->notifySymbolMetRequiredState(SymbolStringPtr(Sym), Entry.getSymbol());
- if (Q->isComplete())
- Queries.insert(Q);
- }
}
-
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- for (auto &Dep : Deps)
- DepJD->MaterializingInfos[SymbolStringPtr(Dep)].DependantEDUs.insert(
- EDU.get());
- }
-}
-
-/// Removes the given dependence from EDU. If EDU's dependence set becomes
-/// empty then this function adds an entry for it to the EDUInfos map.
-/// Returns true if a new EDUInfosMap entry is added.
-bool ExecutionSession::IL_removeEDUDependence(JITDylib::EmissionDepUnit &EDU,
- JITDylib &DepJD,
- NonOwningSymbolStringPtr DepSym,
- EDUInfosMap &EDUInfos) {
- assert(EDU.Dependencies.count(&DepJD) &&
- "JD does not appear in Dependencies of DependantEDU");
- assert(EDU.Dependencies[&DepJD].count(DepSym) &&
- "Symbol does not appear in Dependencies of DependantEDU");
- auto &JDDeps = EDU.Dependencies[&DepJD];
- JDDeps.erase(DepSym);
- if (JDDeps.empty()) {
- EDU.Dependencies.erase(&DepJD);
- if (EDU.Dependencies.empty()) {
- // If the dependencies set has become empty then EDU _may_ be ready
- // (we won't know for sure until we've propagated the extra-emit deps).
- // Create an EDUInfo for it (if it doesn't have one already) so that
- // it'll be visited after propagation.
- auto &DepEDUInfo = EDUInfos[&EDU];
- if (!DepEDUInfo.EDU) {
- assert(EDU.JD->Symbols.count(
- SymbolStringPtr(EDU.Symbols.begin()->first)) &&
- "Missing symbol entry for first symbol in EDU");
- auto DepEDUFirstMI = EDU.JD->MaterializingInfos.find(
- SymbolStringPtr(EDU.Symbols.begin()->first));
- assert(DepEDUFirstMI != EDU.JD->MaterializingInfos.end() &&
- "Missing MI for first symbol in DependantEDU");
- DepEDUInfo.EDU = DepEDUFirstMI->second.DefiningEDU;
- return true;
- }
- }
- }
- return false;
}
-Error ExecutionSession::makeJDClosedError(JITDylib::EmissionDepUnit &EDU,
- JITDylib &ClosedJD) {
- SymbolNameSet FailedSymbols;
- for (auto &[Sym, Flags] : EDU.Symbols)
- FailedSymbols.insert(SymbolStringPtr(Sym));
- SymbolDependenceMap BadDeps;
- for (auto &Dep : EDU.Dependencies[&ClosedJD])
- BadDeps[&ClosedJD].insert(SymbolStringPtr(Dep));
- return make_error<UnsatisfiedSymbolDependencies>(
- ClosedJD.getExecutionSession().getSymbolStringPool(), EDU.JD,
- std::move(FailedSymbols), std::move(BadDeps),
- ClosedJD.getName() + " is closed");
-}
-
-Error ExecutionSession::makeUnsatisfiedDepsError(JITDylib::EmissionDepUnit &EDU,
- JITDylib &BadJD,
- SymbolNameSet BadDeps) {
- SymbolNameSet FailedSymbols;
- for (auto &[Sym, Flags] : EDU.Symbols)
- FailedSymbols.insert(SymbolStringPtr(Sym));
- SymbolDependenceMap BadDepsMap;
- BadDepsMap[&BadJD] = std::move(BadDeps);
- return make_error<UnsatisfiedSymbolDependencies>(
- BadJD.getExecutionSession().getSymbolStringPool(), &BadJD,
- std::move(FailedSymbols), std::move(BadDepsMap),
- "dependencies removed or in error state");
-}
-
-Expected<JITDylib::AsynchronousSymbolQuerySet>
+Expected<ExecutionSession::EmitQueries>
ExecutionSession::IL_emit(MaterializationResponsibility &MR,
- EDUInfosMap EDUInfos) {
+ WaitingOnGraph::SimplifyResult SR) {
if (MR.RT->isDefunct())
return make_error<ResourceTrackerDefunct>(MR.RT);
@@ -3279,169 +2869,50 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR,
return make_error<StringError>("JITDylib " + TargetJD.getName() +
" is defunct",
inconvertibleErrorCode());
+
#ifdef EXPENSIVE_CHECKS
verifySessionState("entering ExecutionSession::IL_emit");
#endif
- // Walk all EDUs:
- // 1. Verifying that dependencies are available (not removed or in the error
- // state.
- // 2. Removing any dependencies that are already Ready.
- // 3. Lifting any EDUs for Emitted symbols into the EDUInfos map.
- // 4. Finding any dependant EDUs and lifting them into the EDUInfos map.
- std::deque<JITDylib::EmissionDepUnit *> Worklist;
- for (auto &[EDU, _] : EDUInfos)
- Worklist.push_back(EDU);
-
- for (auto *EDU : Worklist) {
- auto *EDUInfo = &EDUInfos[EDU];
-
- SmallVector<JITDylib *> DepJDsToRemove;
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- if (DepJD->State != JITDylib::Open)
- return makeJDClosedError(*EDU, *DepJD);
-
- SymbolNameSet BadDeps;
- SmallVector<NonOwningSymbolStringPtr> DepsToRemove;
- for (auto &Dep : Deps) {
- auto DepEntryItr = DepJD->Symbols.find(SymbolStringPtr(Dep));
-
- // If this dep has been removed or moved to the error state then add it
- // to the bad deps set. We aggregate these bad deps for more
- // comprehensive error messages.
- if (DepEntryItr == DepJD->Symbols.end() ||
- DepEntryItr->second.getFlags().hasError()) {
- BadDeps.insert(SymbolStringPtr(Dep));
- continue;
- }
-
- // If this dep isn't emitted yet then just add it to the NewDeps set to
- // be propagated.
- auto &DepEntry = DepEntryItr->second;
- if (DepEntry.getState() < SymbolState::Emitted) {
- EDUInfo->NewDeps[DepJD].insert(Dep);
- continue;
- }
-
- // This dep has been emitted, so add it to the list to be removed from
- // EDU.
- DepsToRemove.push_back(Dep);
-
- // If Dep is Ready then there's nothing further to do.
- if (DepEntry.getState() == SymbolState::Ready) {
- assert(!DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) &&
- "Unexpected MaterializationInfo attached to ready symbol");
- continue;
- }
+ auto ER = G.emit(std::move(SR),
+ [this](JITDylib *JD, NonOwningSymbolStringPtr Name) {
+ return IL_getSymbolState(JD, Name);
+ });
- // If we get here then Dep is Emitted. We need to look up its defining
- // EDU and add this EDU to the defining EDU's list of users (this means
- // creating an EDUInfos entry if the defining EDU doesn't have one
- // already).
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(Dep)) &&
- "Expected MaterializationInfo for emitted dependency");
- auto &DepMI = DepJD->MaterializingInfos[SymbolStringPtr(Dep)];
- assert(DepMI.DefiningEDU &&
- "Emitted symbol does not have a defining EDU");
- assert(DepMI.DependantEDUs.empty() &&
- "Already-emitted symbol has dependant EDUs?");
- auto &DepEDUInfo = EDUInfos[DepMI.DefiningEDU.get()];
- if (!DepEDUInfo.EDU) {
- // No EDUInfo yet -- build initial entry, and reset the EDUInfo
- // pointer, which we will have invalidated.
- EDUInfo = &EDUInfos[EDU];
- DepEDUInfo.EDU = DepMI.DefiningEDU;
- for (auto &[DepDepJD, DepDeps] : DepEDUInfo.EDU->Dependencies) {
- if (DepDepJD == &TargetJD) {
- for (auto &DepDep : DepDeps)
- if (!MR.getSymbols().count(SymbolStringPtr(DepDep)))
- DepEDUInfo.NewDeps[DepDepJD].insert(DepDep);
- } else
- DepEDUInfo.NewDeps[DepDepJD] = DepDeps;
- }
- }
- DepEDUInfo.IntraEmitUsers.insert(EDU);
- }
-
- // Some dependencies were removed or in an error state -- error out.
- if (!BadDeps.empty())
- return makeUnsatisfiedDepsError(*EDU, *DepJD, std::move(BadDeps));
-
- // Remove the emitted / ready deps from DepJD.
- for (auto &Dep : DepsToRemove)
- Deps.erase(Dep);
-
- // If there are no further deps in DepJD then flag it for removal too.
- if (Deps.empty())
- DepJDsToRemove.push_back(DepJD);
- }
+ EmitQueries EQ;
- // Remove any JDs whose dependence sets have become empty.
- for (auto &DepJD : DepJDsToRemove) {
- assert(EDU->Dependencies.count(DepJD) &&
- "Trying to remove non-existent dep entries");
- EDU->Dependencies.erase(DepJD);
- }
-
- // Now look for users of this EDU.
- for (auto &[Sym, Flags] : EDU->Symbols) {
- assert(TargetJD.Symbols.count(SymbolStringPtr(Sym)) &&
- "Sym not present in symbol table");
- assert((TargetJD.Symbols[SymbolStringPtr(Sym)].getState() ==
- SymbolState::Resolved ||
- TargetJD.Symbols[SymbolStringPtr(Sym)]
- .getFlags()
- .hasMaterializationSideEffectsOnly()) &&
- "Emitting symbol not in the resolved state");
- assert(!TargetJD.Symbols[SymbolStringPtr(Sym)].getFlags().hasError() &&
- "Symbol is already in an error state");
-
- auto MII = TargetJD.MaterializingInfos.find(SymbolStringPtr(Sym));
- if (MII == TargetJD.MaterializingInfos.end() ||
- MII->second.DependantEDUs.empty())
- continue;
-
- for (auto &DependantEDU : MII->second.DependantEDUs) {
- if (IL_removeEDUDependence(*DependantEDU, TargetJD, Sym, EDUInfos))
- EDUInfo = &EDUInfos[EDU];
- EDUInfo->IntraEmitUsers.insert(DependantEDU);
- }
- MII->second.DependantEDUs.clear();
- }
- }
-
- Worklist.clear();
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- if (!EDUInfo.IntraEmitUsers.empty() && !EDU->Dependencies.empty()) {
- if (EDUInfo.NewDeps.empty())
- EDUInfo.NewDeps = EDU->Dependencies;
- Worklist.push_back(EDU);
- }
- }
-
- propagateExtraEmitDeps(
- Worklist, EDUInfos,
- [](JITDylib::EmissionDepUnit &EDU, JITDylib &JD,
- NonOwningSymbolStringPtr Sym) {
- JD.MaterializingInfos[SymbolStringPtr(Sym)].DependantEDUs.insert(&EDU);
- });
+ // Handle failed queries.
+ for (auto &SN : ER.Failed)
+ IL_collectQueries(
+ EQ.Failed, SN->defs(),
+ [](JITDylib::SymbolTableEntry &E) {
+ E.setFlags(E.getFlags() = JITSymbolFlags::HasError);
+ },
+ [&](AsynchronousSymbolQuery &Q, JITDylib &JD,
+ NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) {
+ auto &FS = EQ.FailedSymsForQuery[&Q];
+ if (!FS)
+ FS = std::make_shared<SymbolDependenceMap>();
+ (*FS)[&JD].insert(SymbolStringPtr(Name));
+ });
- JITDylib::AsynchronousSymbolQuerySet CompletedQueries;
+ for (auto &FQ : EQ.Failed)
+ FQ->detach();
- // Extract completed queries and lodge not-yet-ready EDUs in the
- // session.
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- if (EDU->Dependencies.empty())
- IL_makeEDUReady(std::move(EDUInfo.EDU), CompletedQueries);
- else
- IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries);
- }
+ for (auto &SN : ER.Ready)
+ IL_collectQueries(
+ EQ.Updated, SN->defs(),
+ [](JITDylib::SymbolTableEntry &E) { E.setState(SymbolState::Ready); },
+ [](AsynchronousSymbolQuery &Q, JITDylib &JD,
+ NonOwningSymbolStringPtr Name, JITDylib::SymbolTableEntry &E) {
+ Q.notifySymbolMetRequiredState(SymbolStringPtr(Name), E.getSymbol());
+ });
#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_emit");
#endif
- return std::move(CompletedQueries);
+ return std::move(EQ);
}
Error ExecutionSession::OL_notifyEmitted(
@@ -3471,40 +2942,127 @@ Error ExecutionSession::OL_notifyEmitted(
}
#endif // NDEBUG
- auto EDUInfos = simplifyDepGroups(MR, DepGroups);
+ std::vector<std::unique_ptr<WaitingOnGraph::SuperNode>> SNs;
+ WaitingOnGraph::ContainerElementsMap Residual;
+ {
+ auto &JDResidual = Residual[&MR.getTargetJITDylib()];
+ for (auto &[Name, Flags] : MR.getSymbols())
+ JDResidual.insert(NonOwningSymbolStringPtr(Name));
+
+ for (auto &SDG : DepGroups) {
+ WaitingOnGraph::ContainerElementsMap Defs;
+ assert(!SDG.Symbols.empty());
+ auto &JDDefs = Defs[&MR.getTargetJITDylib()];
+ for (auto &Def : SDG.Symbols) {
+ JDDefs.insert(NonOwningSymbolStringPtr(Def));
+ JDResidual.erase(NonOwningSymbolStringPtr(Def));
+ }
+ WaitingOnGraph::ContainerElementsMap Deps;
+ if (!SDG.Dependencies.empty()) {
+ for (auto &[JD, Syms] : SDG.Dependencies) {
+ auto &JDDeps = Deps[JD];
+ for (auto &Dep : Syms)
+ JDDeps.insert(NonOwningSymbolStringPtr(Dep));
+ }
+ }
+ SNs.push_back(std::make_unique<WaitingOnGraph::SuperNode>(
+ std::move(Defs), std::move(Deps)));
+ }
+ if (!JDResidual.empty())
+ SNs.push_back(std::make_unique<WaitingOnGraph::SuperNode>(
+ std::move(Residual), WaitingOnGraph::ContainerElementsMap()));
+ }
+
+ auto SR = WaitingOnGraph::simplify(std::move(SNs));
LLVM_DEBUG({
dbgs() << " Simplified dependencies:\n";
- for (auto &[EDU, EDUInfo] : EDUInfos) {
- dbgs() << " Symbols: { ";
- for (auto &[Sym, Flags] : EDU->Symbols)
- dbgs() << Sym << " ";
- dbgs() << "}, Dependencies: { ";
- for (auto &[DepJD, Deps] : EDU->Dependencies) {
- dbgs() << "(" << DepJD->getName() << ", { ";
- for (auto &Dep : Deps)
- dbgs() << Dep << " ";
- dbgs() << "}) ";
+ for (auto &SN : SR.superNodes()) {
+
+ auto SortedLibs = [](WaitingOnGraph::ContainerElementsMap &C) {
+ std::vector<JITDylib *> JDs;
+ for (auto &[JD, _] : C)
+ JDs.push_back(JD);
+ llvm::sort(JDs, [](const JITDylib *LHS, const JITDylib *RHS) {
+ return LHS->getName() < RHS->getName();
+ });
+ return JDs;
+ };
+
+ auto SortedNames = [](WaitingOnGraph::ElementSet &Elems) {
+ std::vector<NonOwningSymbolStringPtr> Names(Elems.begin(), Elems.end());
+ llvm::sort(Names, [](const NonOwningSymbolStringPtr &LHS,
+ const NonOwningSymbolStringPtr &RHS) {
+ return *LHS < *RHS;
+ });
+ return Names;
+ };
+
+ dbgs() << " Defs: {";
+ for (auto *JD : SortedLibs(SN->defs())) {
+ dbgs() << " (" << JD->getName() << ", [";
+ for (auto &Sym : SortedNames(SN->defs()[JD]))
+ dbgs() << " " << Sym;
+ dbgs() << " ])";
+ }
+ dbgs() << " }, Deps: {";
+ for (auto *JD : SortedLibs(SN->deps())) {
+ dbgs() << " (" << JD->getName() << ", [";
+ for (auto &Sym : SortedNames(SN->deps()[JD]))
+ dbgs() << " " << Sym;
+ dbgs() << " ])";
}
- dbgs() << "}\n";
+ dbgs() << " }\n";
}
});
-
- auto CompletedQueries =
- runSessionLocked([&]() { return IL_emit(MR, EDUInfos); });
+ auto EmitQueries =
+ runSessionLocked([&]() { return IL_emit(MR, std::move(SR)); });
// On error bail out.
- if (!CompletedQueries)
- return CompletedQueries.takeError();
+ if (!EmitQueries)
+ return EmitQueries.takeError();
- MR.SymbolFlags.clear();
+ // Otherwise notify failed queries, and any updated queries that have been
+ // completed.
- // Otherwise notify all the completed queries.
- for (auto &Q : *CompletedQueries) {
- assert(Q->isComplete() && "Q is not complete");
- Q->handleComplete(*this);
+ // FIXME: Get rid of error return from notifyEmitted.
+ SymbolDependenceMap BadDeps;
+ {
+ for (auto &FQ : EmitQueries->Failed) {
+ FQ->detach();
+ assert(EmitQueries->FailedSymsForQuery.count(FQ.get()) &&
+ "Missing failed symbols for query");
+ auto FailedSyms = std::move(EmitQueries->FailedSymsForQuery[FQ.get()]);
+ for (auto &[JD, Syms] : *FailedSyms) {
+ auto &BadDepsForJD = BadDeps[JD];
+ for (auto &Sym : Syms)
+ BadDepsForJD.insert(Sym);
+ }
+ FQ->handleFailed(make_error<FailedToMaterialize>(getSymbolStringPool(),
+ std::move(FailedSyms)));
+ }
+ }
+
+ for (auto &UQ : EmitQueries->Updated)
+ if (UQ->isComplete())
+ UQ->handleComplete(*this);
+
+ // If there are any bad dependencies then return an error.
+ if (!BadDeps.empty()) {
+ SymbolNameSet BadNames;
+ // Note: The name set calculated here is bogus: it includes all symbols in
+ // the MR, not just the ones that failed. We want to remove the error
+ // return path from notifyEmitted anyway, so this is just a brief
+ // placeholder to maintain (roughly) the current error behavior.
+ for (auto &[Name, Flags] : MR.getSymbols())
+ BadNames.insert(Name);
+ MR.SymbolFlags.clear();
+ return make_error<UnsatisfiedSymbolDependencies>(
+ getSymbolStringPool(), &MR.getTargetJITDylib(), std::move(BadNames),
+ std::move(BadDeps), "dependencies removed or in error state");
}
+ MR.SymbolFlags.clear();
return Error::success();
}
@@ -3535,158 +3093,48 @@ ExecutionSession::IL_failSymbols(JITDylib &JD,
#endif
JITDylib::AsynchronousSymbolQuerySet FailedQueries;
- auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) {
- JITDylib::AsynchronousSymbolQueryList ToDetach;
- for (auto &Q : MI.pendingQueries()) {
- // Add the query to the list to be failed and detach it.
- FailedQueries.insert(Q);
- ToDetach.push_back(Q);
+ auto Fail = [&](JITDylib *FailJD, NonOwningSymbolStringPtr FailSym) {
+ auto I = FailJD->Symbols.find_as(FailSym);
+ assert(I != FailJD->Symbols.end());
+ I->second.setFlags(I->second.getFlags() | JITSymbolFlags::HasError);
+ auto J = FailJD->MaterializingInfos.find_as(FailSym);
+ if (J != FailJD->MaterializingInfos.end()) {
+ for (auto &Q : J->second.takeAllPendingQueries())
+ FailedQueries.insert(std::move(Q));
+ FailJD->MaterializingInfos.erase(J);
}
- for (auto &Q : ToDetach)
- Q->detach();
- assert(!MI.hasQueriesPending() && "Queries still pending after detach");
};
- for (auto &Name : SymbolsToFail) {
- (*FailedSymbolsMap)[&JD].insert(Name);
-
- // Look up the symbol to fail.
- auto SymI = JD.Symbols.find(Name);
-
- // FIXME: Revisit this. We should be able to assert sequencing between
- // ResourceTracker removal and symbol failure.
- //
- // It's possible that this symbol has already been removed, e.g. if a
- // materialization failure happens concurrently with a ResourceTracker or
- // JITDylib removal. In that case we can safely skip this symbol and
- // continue.
- if (SymI == JD.Symbols.end())
- continue;
- auto &Sym = SymI->second;
-
- // If the symbol is already in the error state then we must have visited
- // it earlier.
- if (Sym.getFlags().hasError()) {
- assert(!JD.MaterializingInfos.count(Name) &&
- "Symbol in error state still has MaterializingInfo");
- continue;
- }
+ auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
- // Move the symbol into the error state.
- Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError);
-
- // FIXME: Come up with a sane mapping of state to
- // presence-of-MaterializingInfo so that we can assert presence / absence
- // here, rather than testing it.
- auto MII = JD.MaterializingInfos.find(Name);
- if (MII == JD.MaterializingInfos.end())
- continue;
-
- auto &MI = MII->second;
-
- // Collect queries to be failed for this MII.
- ExtractFailedQueries(MI);
-
- if (MI.DefiningEDU) {
- // If there is a DefiningEDU for this symbol then remove this
- // symbol from it.
- assert(MI.DependantEDUs.empty() &&
- "Symbol with DefiningEDU should not have DependantEDUs");
- assert(Sym.getState() >= SymbolState::Emitted &&
- "Symbol has EDU, should have been emitted");
- assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) &&
- "Symbol does not appear in its DefiningEDU");
- MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name));
-
- // Remove this EDU from the dependants lists of its dependencies.
- for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) {
- for (auto DepSym : DepSyms) {
- assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) &&
- "DepSym not in DepJD");
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) &&
- "DepSym has not MaterializingInfo");
- auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)];
- assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) &&
- "DefiningEDU missing from DependantEDUs list of dependency");
- SymMI.DependantEDUs.erase(MI.DefiningEDU.get());
- }
- }
+ {
+ auto &FailedSymsForJD = (*FailedSymbolsMap)[&JD];
+ for (auto &Sym : SymbolsToFail) {
+ FailedSymsForJD.insert(Sym);
+ Fail(&JD, NonOwningSymbolStringPtr(Sym));
+ }
+ }
- MI.DefiningEDU = nullptr;
- } else {
- // Otherwise if there are any EDUs waiting on this symbol then move
- // those symbols to the error state too, and deregister them from the
- // symbols that they depend on.
- // Note: We use a copy of DependantEDUs here since we'll be removing
- // from the original set as we go.
- for (auto &DependantEDU : MI.DependantEDUs) {
-
- // Remove DependantEDU from all of its users DependantEDUs lists.
- for (auto &[DepJD, DepSyms] : DependantEDU->Dependencies) {
- for (auto DepSym : DepSyms) {
- // Skip self-reference to avoid invalidating the MI.DependantEDUs
- // map. We'll clear this later.
- if (DepJD == &JD && DepSym == Name)
- continue;
- assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) &&
- "DepSym not in DepJD?");
- assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) &&
- "DependantEDU not registered with symbol it depends on");
- auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)];
- assert(SymMI.DependantEDUs.count(DependantEDU) &&
- "DependantEDU missing from DependantEDUs list");
- SymMI.DependantEDUs.erase(DependantEDU);
- }
- }
+ WaitingOnGraph::ContainerElementsMap ToFail;
+ auto &JDToFail = ToFail[&JD];
+ for (auto &Sym : SymbolsToFail)
+ JDToFail.insert(NonOwningSymbolStringPtr(Sym));
- // Move any symbols defined by DependantEDU into the error state and
- // fail any queries waiting on them.
- auto &DepJD = *DependantEDU->JD;
- auto DepEDUSymbols = std::move(DependantEDU->Symbols);
- for (auto &[DepName, Flags] : DepEDUSymbols) {
- auto DepSymItr = DepJD.Symbols.find(SymbolStringPtr(DepName));
- assert(DepSymItr != DepJD.Symbols.end() &&
- "Symbol not present in table");
- auto &DepSym = DepSymItr->second;
-
- assert(DepSym.getState() >= SymbolState::Emitted &&
- "Symbol has EDU, should have been emitted");
- assert(!DepSym.getFlags().hasError() &&
- "Symbol is already in the error state?");
- DepSym.setFlags(DepSym.getFlags() | JITSymbolFlags::HasError);
- (*FailedSymbolsMap)[&DepJD].insert(SymbolStringPtr(DepName));
-
- // This symbol has a defining EDU so its MaterializingInfo object must
- // exist.
- auto DepMIItr =
- DepJD.MaterializingInfos.find(SymbolStringPtr(DepName));
- assert(DepMIItr != DepJD.MaterializingInfos.end() &&
- "Symbol has defining EDU but not MaterializingInfo");
- auto &DepMI = DepMIItr->second;
- assert(DepMI.DefiningEDU.get() == DependantEDU &&
- "Bad EDU dependence edge");
- assert(DepMI.DependantEDUs.empty() &&
- "Symbol was emitted, should not have any DependantEDUs");
- ExtractFailedQueries(DepMI);
- DepJD.MaterializingInfos.erase(SymbolStringPtr(DepName));
- }
+ auto FailedSNs = G.fail(ToFail);
- DepJD.shrinkMaterializationInfoMemory();
+ for (auto &SN : FailedSNs) {
+ for (auto &[FailJD, Defs] : SN->defs()) {
+ auto &FailedSymsForFailJD = (*FailedSymbolsMap)[FailJD];
+ for (auto &Def : Defs) {
+ FailedSymsForFailJD.insert(SymbolStringPtr(Def));
+ Fail(FailJD, Def);
}
-
- MI.DependantEDUs.clear();
}
-
- assert(!MI.DefiningEDU && "DefiningEDU should have been reset");
- assert(MI.DependantEDUs.empty() &&
- "DependantEDUs should have been removed above");
- assert(!MI.hasQueriesPending() &&
- "Can not delete MaterializingInfo with queries pending");
- JD.MaterializingInfos.erase(Name);
}
- JD.shrinkMaterializationInfoMemory();
+ // Detach all failed queries.
+ for (auto &Q : FailedQueries)
+ Q->detach();
#ifdef EXPENSIVE_CHECKS
verifySessionState("exiting ExecutionSession::IL_failSymbols");
@@ -3721,9 +3169,11 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
return IL_failSymbols(MR.getTargetJITDylib(), SymbolsToFail);
});
- for (auto &Q : FailedQueries)
+ for (auto &Q : FailedQueries) {
+ Q->detach();
Q->handleFailed(
make_error<FailedToMaterialize>(getSymbolStringPool(), FailedSymbols));
+ }
}
Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index dec1df7..893523c 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -448,7 +448,7 @@ Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) {
if (const char *ErrMsg = WFR.getOutOfBandError())
return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
- detail::SPSSerializableError Info;
+ orc::shared::detail::SPSSerializableError Info;
SPSInputBuffer IB(WFR.data(), WFR.size());
if (!SPSArgList<SPSError>::deserialize(IB, Info))
return make_error<StringError>("Could not deserialize hangup info",
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 3908a78..488b078 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -3196,7 +3196,7 @@ void AssemblyWriter::printModuleSummaryIndex() {
// for aliasee (then update BitcodeWriter.cpp and remove get/setAliaseeGUID).
for (auto &GlobalList : *TheIndex) {
auto GUID = GlobalList.first;
- for (auto &Summary : GlobalList.second.SummaryList)
+ for (auto &Summary : GlobalList.second.getSummaryList())
SummaryToGUIDMap[Summary.get()] = GUID;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 10f915d..7e5e7b5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -6045,6 +6045,120 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
}
}
+// Check if the function attribute is not present and set it.
+static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
+ StringRef Value) {
+ if (!F.hasFnAttribute(FnAttrName))
+ F.addFnAttr(FnAttrName, Value);
+}
+
+// Check if the function attribute is not present and set it if needed.
+// If the attribute is "false" then removes it.
+// If the attribute is "true" resets it to a valueless attribute.
+static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
+ if (!F.hasFnAttribute(FnAttrName)) {
+ if (Set)
+ F.addFnAttr(FnAttrName);
+ } else {
+ auto A = F.getFnAttribute(FnAttrName);
+ if ("false" == A.getValueAsString())
+ F.removeFnAttr(FnAttrName);
+ else if ("true" == A.getValueAsString()) {
+ F.removeFnAttr(FnAttrName);
+ F.addFnAttr(FnAttrName);
+ }
+ }
+}
+
+void llvm::copyModuleAttrToFunctions(Module &M) {
+ Triple T(M.getTargetTriple());
+ if (!T.isThumb() && !T.isARM() && !T.isAArch64())
+ return;
+
+ uint64_t BTEValue = 0;
+ uint64_t BPPLRValue = 0;
+ uint64_t GCSValue = 0;
+ uint64_t SRAValue = 0;
+ uint64_t SRAALLValue = 0;
+ uint64_t SRABKeyValue = 0;
+
+ NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
+ if (ModFlags) {
+ for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
+ MDNode *Op = ModFlags->getOperand(I);
+ if (Op->getNumOperands() != 3)
+ continue;
+
+ MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
+ auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
+ if (!ID || !CI)
+ continue;
+
+ StringRef IDStr = ID->getString();
+ uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
+ : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
+ : IDStr == "guarded-control-stack" ? &GCSValue
+ : IDStr == "sign-return-address" ? &SRAValue
+ : IDStr == "sign-return-address-all" ? &SRAALLValue
+ : IDStr == "sign-return-address-with-bkey"
+ ? &SRABKeyValue
+ : nullptr;
+ if (!ValPtr)
+ continue;
+
+ *ValPtr = CI->getZExtValue();
+ if (*ValPtr == 2)
+ return;
+ }
+ }
+
+ bool BTE = BTEValue == 1;
+ bool BPPLR = BPPLRValue == 1;
+ bool GCS = GCSValue == 1;
+ bool SRA = SRAValue == 1;
+
+ StringRef SignTypeValue = "non-leaf";
+ if (SRA && SRAALLValue == 1)
+ SignTypeValue = "all";
+
+ StringRef SignKeyValue = "a_key";
+ if (SRA && SRABKeyValue == 1)
+ SignKeyValue = "b_key";
+
+ for (Function &F : M.getFunctionList()) {
+ if (F.isDeclaration())
+ continue;
+
+ if (SRA) {
+ setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
+ setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
+ } else {
+ if (auto A = F.getFnAttribute("sign-return-address");
+ A.isValid() && "none" == A.getValueAsString()) {
+ F.removeFnAttr("sign-return-address");
+ F.removeFnAttr("sign-return-address-key");
+ }
+ }
+ ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
+ ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
+ ConvertFunctionAttr(F, GCS, "guarded-control-stack");
+ }
+
+ if (BTE)
+ M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
+ if (BPPLR)
+ M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
+ if (GCS)
+ M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
+ if (SRA) {
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
+ if (SRAALLValue == 1)
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
+ if (SRABKeyValue == 1)
+ M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
+ }
+}
+
static bool isOldLoopArgument(Metadata *MD) {
auto *T = dyn_cast_or_null<MDTuple>(MD);
if (!T)
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 9060a89..3b8fde8 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2878,7 +2878,7 @@ unsigned CastInst::isEliminableCastPair(Instruction::CastOps firstOp,
{ 99,99,99, 0, 0,99,99, 0, 0,99,99,99, 4, 0}, // FPTrunc |
{ 99,99,99, 2, 2,99,99, 8, 2,99,99,99, 4, 0}, // FPExt |
{ 1, 0, 0,99,99, 0, 0,99,99,99,99, 7, 3, 0}, // PtrToInt |
- { 1, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr |
+ { 0, 0, 0,99,99, 0, 0,99,99,99,99, 0, 3, 0}, // PtrToAddr |
{ 99,99,99,99,99,99,99,99,99,11,11,99,15, 0}, // IntToPtr |
{ 5, 5, 5, 0, 0, 5, 5, 0, 0,16,16, 5, 1,14}, // BitCast |
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index dc55b63..a6353664 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -162,7 +162,7 @@ void ModuleSummaryIndex::collectDefinedFunctionsForModule(
StringRef ModulePath, GVSummaryMapTy &GVSummaryMap) const {
for (auto &GlobalList : *this) {
auto GUID = GlobalList.first;
- for (auto &GlobSummary : GlobalList.second.SummaryList) {
+ for (auto &GlobSummary : GlobalList.second.getSummaryList()) {
auto *Summary = dyn_cast_or_null<FunctionSummary>(GlobSummary.get());
if (!Summary)
// Ignore global variable, focus on functions
@@ -263,7 +263,7 @@ void ModuleSummaryIndex::propagateAttributes(
DenseSet<ValueInfo> MarkedNonReadWriteOnly;
for (auto &P : *this) {
bool IsDSOLocal = true;
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
if (!isGlobalValueLive(S.get())) {
// computeDeadSymbolsAndUpdateIndirectCalls should have marked all
// copies live. Note that it is possible that there is a GUID collision
@@ -273,7 +273,7 @@ void ModuleSummaryIndex::propagateAttributes(
// all copies live we can assert here that all are dead if any copy is
// dead.
assert(llvm::none_of(
- P.second.SummaryList,
+ P.second.getSummaryList(),
[&](const std::unique_ptr<GlobalValueSummary> &Summary) {
return isGlobalValueLive(Summary.get());
}));
@@ -308,16 +308,16 @@ void ModuleSummaryIndex::propagateAttributes(
// Mark the flag in all summaries false so that we can do quick check
// without going through the whole list.
for (const std::unique_ptr<GlobalValueSummary> &Summary :
- P.second.SummaryList)
+ P.second.getSummaryList())
Summary->setDSOLocal(false);
}
setWithAttributePropagation();
setWithDSOLocalPropagation();
if (llvm::AreStatisticsEnabled())
for (auto &P : *this)
- if (P.second.SummaryList.size())
+ if (P.second.getSummaryList().size())
if (auto *GVS = dyn_cast<GlobalVarSummary>(
- P.second.SummaryList[0]->getBaseObject()))
+ P.second.getSummaryList()[0]->getBaseObject()))
if (isGlobalValueLive(GVS)) {
if (GVS->maybeReadOnly())
ReadOnlyLiveGVars++;
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 7ea2e46..77af29b 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -21,9 +21,6 @@ using namespace RTLIB;
#define GET_SET_TARGET_RUNTIME_LIBCALL_SETS
#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
#include "llvm/IR/RuntimeLibcalls.inc"
-#undef GET_INIT_RUNTIME_LIBCALL_NAMES
-#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
-#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
/// Set default libcall names. If a target wants to opt-out of a libcall it
/// should be placed here.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index aec8891..cbc0b1d 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -457,7 +457,7 @@ void llvm::thinLTOResolvePrevailingInIndex(
// when needed.
DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
for (auto &I : Index)
- for (auto &S : I.second.SummaryList)
+ for (auto &S : I.second.getSummaryList())
if (auto AS = dyn_cast<AliasSummary>(S.get()))
GlobalInvolvedWithAlias.insert(&AS->getAliasee());
@@ -1182,7 +1182,7 @@ Error LTO::checkPartiallySplit() {
// Otherwise check if there are any recorded in the combined summary from the
// ThinLTO modules.
for (auto &P : ThinLTO.CombinedIndex) {
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
if (!FS)
continue;
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 280c3d1..93118be 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -770,11 +770,11 @@ bool lto::initImportList(const Module &M,
// via a WriteIndexesThinBackend.
for (const auto &GlobalList : CombinedIndex) {
// Ignore entries for undefined references.
- if (GlobalList.second.SummaryList.empty())
+ if (GlobalList.second.getSummaryList().empty())
continue;
auto GUID = GlobalList.first;
- for (const auto &Summary : GlobalList.second.SummaryList) {
+ for (const auto &Summary : GlobalList.second.getSummaryList()) {
// Skip the summaries for the importing module. These are included to
// e.g. record required linkage changes.
if (Summary->modulePath() == M.getModuleIdentifier())
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 5b333cd..ff94c54 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -101,8 +101,8 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
}
-static const GlobalValueSummary *
-getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
+static const GlobalValueSummary *getFirstDefinitionForLinker(
+ ArrayRef<std::unique_ptr<GlobalValueSummary>> GVSummaryList) {
// If there is any strong definition anywhere, get it.
auto StrongDefForLinker = llvm::find_if(
GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
@@ -131,14 +131,15 @@ getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
static void computePrevailingCopies(
const ModuleSummaryIndex &Index,
DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
- auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
- return GVSummaryList.size() > 1;
- };
+ auto HasMultipleCopies =
+ [&](ArrayRef<std::unique_ptr<GlobalValueSummary>> GVSummaryList) {
+ return GVSummaryList.size() > 1;
+ };
for (auto &I : Index) {
- if (HasMultipleCopies(I.second.SummaryList))
+ if (HasMultipleCopies(I.second.getSummaryList()))
PrevailingCopy[I.first] =
- getFirstDefinitionForLinker(I.second.SummaryList);
+ getFirstDefinitionForLinker(I.second.getSummaryList());
}
}
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 1bff6cd..f78d9b0 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1512,6 +1512,11 @@ Error IRLinker::run() {
// Loop over all of the linked values to compute type mappings.
computeTypeMapping();
+ // Convert module level attributes to function level attributes because
+ // after merging modules the attributes might change and would have different
+ // effect on the functions as the original module would have.
+ copyModuleAttrToFunctions(*SrcM);
+
std::reverse(Worklist.begin(), Worklist.end());
while (!Worklist.empty()) {
GlobalValue *GV = Worklist.back();
@@ -1677,6 +1682,11 @@ IRMover::IRMover(Module &M) : Composite(M) {
for (const auto *MD : StructTypes.getVisitedMetadata()) {
SharedMDs[MD].reset(const_cast<MDNode *>(MD));
}
+
+ // Convert module level attributes to function level attributes because
+ // after merging modules the attributes might change and would have different
+ // effect on the functions as the original module would have.
+ copyModuleAttrToFunctions(M);
}
Error IRMover::move(std::unique_ptr<Module> Src,
diff --git a/llvm/lib/Support/AllocToken.cpp b/llvm/lib/Support/AllocToken.cpp
new file mode 100644
index 0000000..95ecda2
--- /dev/null
+++ b/llvm/lib/Support/AllocToken.cpp
@@ -0,0 +1,50 @@
+//===- AllocToken.cpp - Allocation Token Calculation ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of AllocToken modes and shared calculation of stateless token IDs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AllocToken.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SipHash.h"
+
+using namespace llvm;
+
+static uint64_t getStableHash(const AllocTokenMetadata &Metadata,
+ uint64_t MaxTokens) {
+ return getStableSipHash(Metadata.TypeName) % MaxTokens;
+}
+
+std::optional<uint64_t> llvm::getAllocToken(AllocTokenMode Mode,
+ const AllocTokenMetadata &Metadata,
+ uint64_t MaxTokens) {
+ assert(MaxTokens && "Must provide non-zero max tokens");
+
+ switch (Mode) {
+ case AllocTokenMode::Increment:
+ case AllocTokenMode::Random:
+ // Stateful modes cannot be implemented as a pure function.
+ return std::nullopt;
+
+ case AllocTokenMode::TypeHash:
+ return getStableHash(Metadata, MaxTokens);
+
+ case AllocTokenMode::TypeHashPointerSplit: {
+ if (MaxTokens == 1)
+ return 0;
+ const uint64_t HalfTokens = MaxTokens / 2;
+ uint64_t Hash = getStableHash(Metadata, HalfTokens);
+ if (Metadata.ContainsPointer)
+ Hash += HalfTokens;
+ return Hash;
+ }
+ }
+
+ llvm_unreachable("");
+}
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 42b21b5..671a5fe 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -149,6 +149,7 @@ add_llvm_component_library(LLVMSupport
AArch64BuildAttributes.cpp
ARMAttributeParser.cpp
ARMWinEH.cpp
+ AllocToken.cpp
Allocator.cpp
AutoConvert.cpp
Base64.cpp
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 549c418..f74e52a 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -111,7 +111,7 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
}
-LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) {
+void SpecialCaseList::Matcher::preprocess(bool BySize) {
return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 662d84b..a81de5c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27602,6 +27602,15 @@ static SDValue performPTestFirstCombine(SDNode *N,
static SDValue
performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ // If a DUP(Op0) already exists, reuse it for the scalar_to_vector.
+ if (DCI.isAfterLegalizeDAG()) {
+ if (SDNode *LN = DCI.DAG.getNodeIfExists(AArch64ISD::DUP, N->getVTList(),
+ N->getOperand(0)))
+ return SDValue(LN, 0);
+ }
+
// Let's do below transform.
//
// t34: v4i32 = AArch64ISD::UADDLV t2
@@ -27638,7 +27647,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
// Let's generate new sequence with AArch64ISD::NVCAST.
- SDLoc DL(N);
SDValue EXTRACT_SUBVEC =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, UADDLV,
DAG.getConstant(0, DL, MVT::i64));
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 479e345..e3370d3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5722,7 +5722,7 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
}
// Add additional cost for the extends that would need to be inserted.
- return Cost + 4;
+ return Cost + 2;
}
InstructionCost
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
new file mode 100644
index 0000000..30a1f05
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp
@@ -0,0 +1,73 @@
+//===--- AMDGPUBarrierLatency.cpp - AMDGPU Barrier Latency ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains a DAG scheduling mutation to add latency to
+/// barrier edges between ATOMIC_FENCE instructions and preceding
+/// memory accesses potentially affected by the fence.
+/// This encourages the scheduling of more instructions before
+/// ATOMIC_FENCE instructions. ATOMIC_FENCE instructions may
+/// introduce wait counting or indicate an impending S_BARRIER
+/// wait. Having more instructions in-flight across these
+/// constructs improves latency hiding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUBarrierLatency.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+using namespace llvm;
+
+namespace {
+
+class BarrierLatency : public ScheduleDAGMutation {
+public:
+ BarrierLatency() = default;
+ void apply(ScheduleDAGInstrs *DAG) override;
+};
+
+void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
+ constexpr unsigned SyntheticLatency = 2000;
+ for (SUnit &SU : DAG->SUnits) {
+ const MachineInstr *MI = SU.getInstr();
+ if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
+ continue;
+
+ // Update latency on barrier edges of ATOMIC_FENCE.
+ // We don't consider the scope of the fence or type of instruction
+ // involved in the barrier edge.
+ for (SDep &PredDep : SU.Preds) {
+ if (!PredDep.isBarrier())
+ continue;
+ SUnit *PredSU = PredDep.getSUnit();
+ MachineInstr *MI = PredSU->getInstr();
+ // Only consider memory loads
+ if (!MI->mayLoad() || MI->mayStore())
+ continue;
+ SDep ForwardD = PredDep;
+ ForwardD.setSUnit(&SU);
+ for (SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep == ForwardD) {
+ SuccDep.setLatency(SuccDep.getLatency() + SyntheticLatency);
+ break;
+ }
+ }
+ PredDep.setLatency(PredDep.getLatency() + SyntheticLatency);
+ PredSU->setDepthDirty();
+ SU.setDepthDirty();
+ }
+ }
+}
+
+} // end namespace
+
+std::unique_ptr<ScheduleDAGMutation>
+llvm::createAMDGPUBarrierLatencyDAGMutation() {
+ return std::make_unique<BarrierLatency>();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
new file mode 100644
index 0000000..c23f0b9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h
@@ -0,0 +1,21 @@
+//===- AMDGPUBarrierLatency.h - AMDGPU Export Clustering --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUBARRIERLATENCY_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUBARRIERLATENCY_H
+
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include <memory>
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUBarrierLatencyDAGMutation();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUBARRIERLATENCY_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4958a20..996b55f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPUBarrierLatency.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUExportKernelRuntimeHandles.h"
@@ -639,6 +640,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
return DAG;
}
@@ -659,6 +661,7 @@ createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C) {
if (ST.shouldClusterStores())
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
return DAG;
}
@@ -1197,6 +1200,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
EnableVOPD)
DAG->addMutation(createVOPDPairingMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
+ DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
return DAG;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 13f727b68..a1e0e52 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAsmPrinter.cpp
AMDGPUAtomicOptimizer.cpp
AMDGPUAttributor.cpp
+ AMDGPUBarrierLatency.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp
AMDGPUCombinerHelper.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 50447f4..2ff2d2f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4032,28 +4032,31 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
}
}
+/// Helper struct for the implementation of 3-address conversion to communicate
+/// updates made to instruction operands.
+struct SIInstrInfo::ThreeAddressUpdates {
+ /// Other instruction whose def is no longer used by the converted
+ /// instruction.
+ MachineInstr *RemoveMIUse = nullptr;
+};
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
MachineBasicBlock &MBB = *MI.getParent();
- unsigned Opc = MI.getOpcode();
+ ThreeAddressUpdates U;
+ MachineInstr *NewMI = convertToThreeAddressImpl(MI, U);
- // Handle MFMA.
- int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
- if (NewMFMAOpc != -1) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
- updateLiveVariables(LV, MI, *MIB);
+ if (NewMI) {
+ updateLiveVariables(LV, MI, *NewMI);
if (LIS) {
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
// SlotIndex of defs needs to be updated when converting to early-clobber
- MachineOperand &Def = MIB->getOperand(0);
+ MachineOperand &Def = NewMI->getOperand(0);
if (Def.isEarlyClobber() && Def.isReg() &&
LIS->hasInterval(Def.getReg())) {
- SlotIndex OldIndex = LIS->getInstructionIndex(*MIB).getRegSlot(false);
- SlotIndex NewIndex = LIS->getInstructionIndex(*MIB).getRegSlot(true);
+ SlotIndex OldIndex = LIS->getInstructionIndex(*NewMI).getRegSlot(false);
+ SlotIndex NewIndex = LIS->getInstructionIndex(*NewMI).getRegSlot(true);
auto &LI = LIS->getInterval(Def.getReg());
auto UpdateDefIndex = [&](LiveRange &LR) {
auto *S = LR.find(OldIndex);
@@ -4068,6 +4071,58 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
UpdateDefIndex(SR);
}
}
+ }
+
+ if (U.RemoveMIUse) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ // The only user is the instruction which will be killed.
+ Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
+
+ if (MRI.hasOneNonDBGUse(DefReg)) {
+ // We cannot just remove the DefMI here, calling pass will crash.
+ U.RemoveMIUse->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ U.RemoveMIUse->getOperand(0).setIsDead(true);
+ for (unsigned I = U.RemoveMIUse->getNumOperands() - 1; I != 0; --I)
+ U.RemoveMIUse->removeOperand(I);
+ if (LV)
+ LV->getVarInfo(DefReg).AliveBlocks.clear();
+ }
+
+ if (LIS) {
+ LiveInterval &DefLI = LIS->getInterval(DefReg);
+
+ // We cannot delete the original instruction here, so hack out the use
+ // in the original instruction with a dummy register so we can use
+ // shrinkToUses to deal with any multi-use edge cases. Other targets do
+ // not have the complexity of deleting a use to consider here.
+ Register DummyReg = MRI.cloneVirtualRegister(DefReg);
+ for (MachineOperand &MIOp : MI.uses()) {
+ if (MIOp.isReg() && MIOp.getReg() == DefReg) {
+ MIOp.setIsUndef(true);
+ MIOp.setReg(DummyReg);
+ }
+ }
+
+ LIS->shrinkToUses(&DefLI);
+ }
+ }
+
+ return NewMI;
+}
+
+MachineInstr *
+SIInstrInfo::convertToThreeAddressImpl(MachineInstr &MI,
+ ThreeAddressUpdates &U) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ unsigned Opc = MI.getOpcode();
+
+ // Handle MFMA.
+ int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
+ if (NewMFMAOpc != -1) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
return MIB;
}
@@ -4077,11 +4132,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
MIB->addOperand(MI.getOperand(I));
-
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-
return MIB;
}
@@ -4152,39 +4202,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&]() -> void {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- // The only user is the instruction which will be killed.
- Register DefReg = DefMI->getOperand(0).getReg();
-
- if (MRI.hasOneNonDBGUse(DefReg)) {
- // We cannot just remove the DefMI here, calling pass will crash.
- DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
- DefMI->getOperand(0).setIsDead(true);
- for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
- DefMI->removeOperand(I);
- if (LV)
- LV->getVarInfo(DefReg).AliveBlocks.clear();
- }
-
- if (LIS) {
- LiveInterval &DefLI = LIS->getInterval(DefReg);
-
- // We cannot delete the original instruction here, so hack out the use
- // in the original instruction with a dummy register so we can use
- // shrinkToUses to deal with any multi-use edge cases. Other targets do
- // not have the complexity of deleting a use to consider here.
- Register DummyReg = MRI.cloneVirtualRegister(DefReg);
- for (MachineOperand &MIOp : MI.uses()) {
- if (MIOp.isReg() && MIOp.getReg() == DefReg) {
- MIOp.setIsUndef(true);
- MIOp.setReg(DummyReg);
- }
- }
-
- LIS->shrinkToUses(&DefLI);
- }
- };
int64_t Imm;
if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
@@ -4196,10 +4213,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src1)
.addImm(Imm)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4212,11 +4226,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.add(*Src2)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4235,12 +4245,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.add(*Src2)
.setMIFlags(MI.getFlags());
- updateLiveVariables(LV, MI, *MIB);
-
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- if (DefMI)
- killDef();
+ U.RemoveMIUse = DefMI;
return MIB;
}
}
@@ -4269,9 +4274,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.setMIFlags(MI.getFlags());
if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
MIB.addImm(OpSel ? OpSel->getImm() : 0);
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index df27ec1..e1d7a07 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -88,6 +88,8 @@ private:
};
class SIInstrInfo final : public AMDGPUGenInstrInfo {
+ struct ThreeAddressUpdates;
+
private:
const SIRegisterInfo RI;
const GCNSubtarget &ST;
@@ -190,6 +192,9 @@ private:
bool resultDependsOnExec(const MachineInstr &MI) const;
+ MachineInstr *convertToThreeAddressImpl(MachineInstr &MI,
+ ThreeAddressUpdates &Updates) const;
+
protected:
/// If the specific machine instruction is a instruction that moves/copies
/// value from one register to another register return destination and source
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 74d4153..6f1feb1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2223,8 +2223,8 @@ def : GCNPat <
def : GCNPat <
(DivergentUnaryFrag<fneg> (v2f32 VReg_64:$src)),
- (V_PK_ADD_F32 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, VReg_64:$src,
- 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, (i64 0),
+ (V_PK_ADD_F32 !or(SRCMODS.OP_SEL_1, SRCMODS.NEG, SRCMODS.NEG_HI), VReg_64:$src,
+ !or(SRCMODS.OP_SEL_1, SRCMODS.NEG, SRCMODS.NEG_HI), (i64 0),
0, 0, 0, 0, 0)
> {
let SubtargetPredicate = HasPackedFP32Ops;
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index bdbc000..07264d9 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -397,12 +397,6 @@ public:
bool IsCrossAddrSpaceOrdering,
Position Pos) const = 0;
- /// Inserts any necessary instructions before the barrier start instruction
- /// \p MI in order to support pairing of barriers and fences.
- virtual bool insertBarrierStart(MachineBasicBlock::iterator &MI) const {
- return false;
- };
-
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
};
@@ -583,12 +577,8 @@ public:
bool IsCrossAddrSpaceOrdering, Position Pos,
AtomicOrdering Order, bool AtomicsOnly) const override;
- bool insertAcquire(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
-
- bool insertBarrierStart(MachineBasicBlock::iterator &MI) const override;
+ bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace, Position Pos) const override;
};
class SIGfx11CacheControl : public SIGfx10CacheControl {
@@ -2069,8 +2059,11 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// the WGP. Therefore need to wait for operations to complete to ensure
// they are visible to waves in the other CU as the L0 is per CU.
// Otherwise in CU mode and all waves of a work-group are on the same CU
- // which shares the same L0.
- if (!ST.isCuModeEnabled()) {
+ // which shares the same L0. Note that we still need to wait when
+ // performing a release in this mode to respect the transitivity of
+ // happens-before, e.g. other waves of the workgroup must be able to
+ // release the memory from another wave at a wider scope.
+ if (!ST.isCuModeEnabled() || isReleaseOrStronger(Order)) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@@ -2225,22 +2218,6 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx10CacheControl::insertBarrierStart(
- MachineBasicBlock::iterator &MI) const {
- // We need to wait on vm_vsrc so barriers can pair with fences in GFX10+ CU
- // mode. This is because a CU mode release fence does not emit any wait, which
- // is fine when only dealing with vmem, but isn't sufficient in the presence
- // of barriers which do not go through vmem.
- // GFX12.5 does not require this additional wait.
- if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts())
- return false;
-
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
- return true;
-}
-
bool SIGfx11CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
@@ -2419,15 +2396,20 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// In WGP mode the waves of a work-group can be executing on either CU
// of the WGP. Therefore need to wait for operations to complete to
// ensure they are visible to waves in the other CU as the L0 is per CU.
+ //
// Otherwise in CU mode and all waves of a work-group are on the same CU
- // which shares the same L0.
+ // which shares the same L0. Note that we still need to wait when
+ // performing a release in this mode to respect the transitivity of
+ // happens-before, e.g. other waves of the workgroup must be able to
+ // release the memory from another wave at a wider scope.
//
// GFX12.5:
// CU$ has two ports. To ensure operations are visible at the workgroup
// level, we need to ensure all operations in this port have completed
// so the other SIMDs in the WG can see them. There is no ordering
// guarantee between the ports.
- if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
+ if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts() ||
+ isReleaseOrStronger(Order)) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
LOADCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@@ -3017,11 +2999,6 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) {
MI = II->getIterator();
}
- if (ST.getInstrInfo()->isBarrierStart(MI->getOpcode())) {
- Changed |= CC->insertBarrierStart(MI);
- continue;
- }
-
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
continue;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42ec8ba..7cce033 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 1f773e2..3368a50 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -820,7 +820,7 @@ void ARMAsmPrinter::emitAttributes() {
auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("branch-target-enforcement"));
- if (BTIValue && BTIValue->isOne()) {
+ if (BTIValue && !BTIValue->isZero()) {
// If "+pacbti" is used as an architecture extension,
// Tag_BTI_extension is emitted in
// ARMTargetStreamer::emitTargetAttributes().
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 35e1127..b1a668e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1089,7 +1089,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
// Register based DivRem for AEABI (RTABI 4.2)
if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
- TT.isTargetMuslAEABI() || TT.isOSWindows()) {
+ TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
+ setOperationAction(ISD::LROUND, MVT::f16, Expand);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
@@ -20574,7 +20575,7 @@ static TargetLowering::ArgListTy getDivRemArgList(
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
- Subtarget->isTargetWindows()) &&
+ Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 96ee69c..597d311 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -882,7 +882,7 @@ static bool producesFalseLanesZero(MachineInstr &MI,
continue;
// Skip the lr predicate reg
int PIdx = llvm::findFirstVPTPredOperandIdx(MI);
- if (PIdx != -1 && (int)MO.getOperandNo() == PIdx + 2)
+ if (PIdx != -1 && MO.getOperandNo() == PIdx + ARM::SUBOP_vpred_n_tp_reg)
continue;
// Check that this instruction will produce zeros in its false lanes:
@@ -1036,6 +1036,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
while (!Worklist.empty()) {
MachineInstr *MI = Worklist.pop_back_val();
if (MI->getOpcode() == ARM::MQPRCopy) {
+ LLVM_DEBUG(dbgs() << " Must generate copy as VMOV: " << *MI);
VMOVCopies.insert(MI);
MachineInstr *CopySrc =
RDI.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg());
@@ -1045,6 +1046,20 @@ bool LowOverheadLoop::ValidateLiveOuts() {
LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);
VMOVCopies.clear();
return false;
+ } else if (isVectorPredicated(MI)) {
+ // If this is a predicated instruction with merging semantics,
+ // check where it gets its false lanes from, if any.
+ int InactiveIdx = findVPTInactiveOperandIdx(*MI);
+ if (InactiveIdx != -1) {
+ SmallPtrSet<MachineInstr *, 2> Defs;
+ MachineInstr *FalseSrc = RDI.getUniqueReachingMIDef(
+ MI, MI->getOperand(InactiveIdx).getReg());
+ if (FalseSrc) {
+ LLVM_DEBUG(dbgs()
+ << " Must check source of false lanes for: " << *MI);
+ Worklist.push_back(FalseSrc);
+ }
+ }
}
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index b2d368e..4a0883c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -343,6 +343,7 @@ public:
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 5eeb4fe..413e844 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -534,7 +534,7 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
Register LR = LoopPhi->getOperand(0).getReg();
for (MachineInstr *MI : MVEInstrs) {
int Idx = findFirstVPTPredOperandIdx(*MI);
- MI->getOperand(Idx + 2).setReg(LR);
+ MI->getOperand(Idx + ARM::SUBOP_vpred_n_tp_reg).setReg(LR);
}
}
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 431ce38..f5653d4 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -805,6 +805,16 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
return -1;
}
+int llvm::findVPTInactiveOperandIdx(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.operands()[i].OperandType == ARM::OPERAND_VPRED_R)
+ return i + ARM::SUBOP_vpred_r_inactive;
+
+ return -1;
+}
+
ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
Register &PredReg) {
int PIdx = findFirstVPTPredOperandIdx(MI);
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 3ec3a621..1b0bf2d 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -90,6 +90,9 @@ inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) {
Register PredReg;
return getVPTInstrPredicate(MI, PredReg);
}
+// Identify the input operand in an MVE predicated instruction which
+// contributes the values of any inactive vector lanes.
+int findVPTInactiveOperandIdx(const MachineInstr &MI);
// Recomputes the Block Mask of Instr, a VPT or VPST instruction.
// This rebuilds the block mask of the instruction depending on the predicates
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index c8866bf..42e90f0 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -294,6 +294,14 @@ public:
if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
RootSignature->eraseFromParent();
+ // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+ // causes all tests using the DXIL Validator to fail.
+ //
+ // This is a temporary fix and should be replaced with a whitelist once
+ // we have determined all metadata that the DXIL Validator allows
+ if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+ ErrNo->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index d758260..1a5f096 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
HexagonPeephole.cpp
+ HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
HexagonRegisterInfo.cpp
HexagonSelectionDAGInfo.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 109aba5..422ab20 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
void initializeHexagonVectorPrintPass(PassRegistry &);
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
Pass *createHexagonLoopIdiomPass();
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
FunctionPass *createHexagonVectorPrint();
FunctionPass *createHexagonVExtract();
FunctionPass *createHexagonExpandCondsets();
+FunctionPass *createHexagonQFPOptimizer();
} // end namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 4ddbe7a..ff876f6 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -920,6 +920,10 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
// successors have been processed.
RegisterSet BlockDefs, InsDefs;
for (MachineInstr &MI : *B) {
+ // Stop if the map size is too large.
+ if (IFMap.size() >= MaxIFMSize)
+ break;
+
InsDefs.clear();
getInstrDefs(&MI, InsDefs);
// Leave those alone. They are more transparent than "insert".
@@ -942,8 +946,8 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
findRecordInsertForms(VR, AVs);
// Stop if the map size is too large.
- if (IFMap.size() > MaxIFMSize)
- return;
+ if (IFMap.size() >= MaxIFMSize)
+ break;
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a94e131..54c8972 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget.useHVX128BOps())
+ if (Subtarget.useHVX128BOps()) {
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
+ }
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
@@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
// Splat the input into a 32-element i32 vector, then AND each element
// with a unique bitmask to isolate individual bits.
- if (ResTy == MVT::v32i1 &&
- (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
- Subtarget.useHVX128BOps()) {
- SDValue Val32 = Val;
- if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
- Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
-
+ auto bitcastI32ToV32I1 = [&](SDValue Val32) {
+ assert(Val32.getValueType().getSizeInBits() == 32 &&
+ "Input must be 32 bits");
MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
SmallVector<SDValue, 32> Mask;
@@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
- return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
+ return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
+ };
+ // === Case: v32i1 ===
+ if (ResTy == MVT::v32i1 &&
+ (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
+ Subtarget.useHVX128BOps()) {
+ SDValue Val32 = Val;
+ if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
+ Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
+ return bitcastI32ToV32I1(Val32);
+ }
+ // === Case: v64i1 ===
+ if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
+ // Split i64 into lo/hi 32-bit halves.
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
+ SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
+
+ // Reuse the same 32-bit logic twice.
+ SDValue LoRes = bitcastI32ToV32I1(Lo);
+ SDValue HiRes = bitcastI32ToV32I1(Hi);
+
+ // Concatenate into a v64i1 predicate.
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
}
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
new file mode 100644
index 0000000..479ac90
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -0,0 +1,334 @@
+//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
+// optimizer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic infrastructure for optimizing intermediate conversion instructions
+// generated while performing vector floating point operations.
+// Currently run at the starting of the code generation for Hexagon, cleans
+// up redundant conversion instructions and replaces the uses of conversion
+// with appropriate machine operand. Liveness is preserved after this pass.
+//
+// @note: The redundant conversion instructions are not eliminated in this pass.
+// In this pass, we are only trying to replace the uses of conversion
+// instructions with its appropriate QFP instruction. We are leaving the job to
+// Dead instruction Elimination pass to remove redundant conversion
+// instructions.
+//
+// Brief overview of working of this QFP optimizer.
+// This version of Hexagon QFP optimizer basically iterates over each
+// instruction, checks whether if it belongs to hexagon floating point HVX
+// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
+// definition for the machine operands corresponding to the instruction.
+//
+// Example:
+// MachineInstruction *MI be the HVX vadd instruction
+// MI -> $v0 = V6_vadd_sf $v1, $v2
+// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+//
+// In the above example, DefMI1 and DefMI2 gives the unique definitions
+// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
+//
+// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
+// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
+// iterates over next instruction.
+//
+// If one the definitions is conversion instruction then our pass will replace
+// the arithmetic instruction with its corresponding mix variant.
+// In the above example, if $v1 is conversion instruction
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
+//
+// If both the definitions are conversion instructions then the instruction will
+// be replaced with its qf variant
+// In the above example, if $v1 and $v2 are conversion instructions
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
+// with $v4)
+//
+// Currently, in this pass, we are not handling the case when the definitions
+// are PHI inst.
+//
+//===----------------------------------------------------------------------===//
+#include <unordered_set>
+#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-qfp-optimizer"
+
+using namespace llvm;
+
+cl::opt<bool>
+ DisableQFOptimizer("disable-qfp-opt", cl::init(false),
+ cl::desc("Disable optimization of Qfloat operations."));
+
+namespace {
+const std::map<unsigned short, unsigned short> QFPInstMap{
+ {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
+ {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
+ {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
+ {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
+ {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
+ {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
+ {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
+ {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
+ {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
+ {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
+ {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
+ {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+} // namespace
+
+namespace llvm {
+
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
+} // namespace llvm
+
+namespace {
+
+struct HexagonQFPOptimizer : public MachineFunctionPass {
+public:
+ static char ID;
+
+ HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const HexagonSubtarget *HST = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+};
+
+char HexagonQFPOptimizer::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
+ HEXAGON_QFP_OPTIMIZER, false, false)
+
+FunctionPass *llvm::createHexagonQFPOptimizer() {
+ return new HexagonQFPOptimizer();
+}
+
+bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ // Early exit:
+ // - if instruction is invalid or has too few operands (QFP ops need 2 sources
+ // + 1 dest),
+ // - or does not have a transformation mapping.
+ if (MI->getNumOperands() < 3)
+ return false;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
+
+ unsigned Op0F = 0;
+ unsigned Op1F = 0;
+ // Get the reaching defs of MI, DefMI1 and DefMI2
+ MachineInstr *DefMI1 = nullptr;
+ MachineInstr *DefMI2 = nullptr;
+
+ if (MI->getOperand(1).isReg())
+ DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (MI->getOperand(2).isReg())
+ DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+ if (!DefMI1 || !DefMI2)
+ return false;
+
+ MachineOperand &Res = MI->getOperand(0);
+ MachineInstr *Inst1 = nullptr;
+ MachineInstr *Inst2 = nullptr;
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
+ DefMI2->dump());
+
+ // Get the reaching defs of DefMI
+ if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
+ DefMI1->getOperand(1).getReg().isVirtual())
+ Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
+
+ if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
+ DefMI2->getOperand(1).getReg().isVirtual())
+ Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
+
+ unsigned Def1OP = DefMI1->getOpcode();
+ unsigned Def2OP = DefMI2->getOpcode();
+
+ MachineInstrBuilder MIB;
+ // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+ if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) {
+
+ // If the reaching defs of DefMI are W register type, we return
+ if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass) ||
+ (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
+ MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass))
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+
+ if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
+ auto OuterIt = QFPInstMap.find(MI->getOpcode());
+ if (OuterIt == QFPInstMap.end())
+ return false;
+ auto InnerIt = QFPInstMap.find(OuterIt->second);
+ if (InnerIt == QFPInstMap.end())
+ return false;
+ InstTy = InnerIt->second;
+ }
+
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP != Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI2->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+
+ if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = DefMI1->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+
+ Op0F = getKillRegState(Src1.isKill());
+ Src1.setIsKill(false);
+ Op1F = getKillRegState(Src2.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+
+ // Case 2: Left operand is conversion to sf/hf
+ } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
+ Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+ (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
+ Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
+ !DefMI1->isPHI() &&
+ (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+ // The second operand of original instruction is converted.
+ // In "mix" instructions, "qf" operand is always the first operand.
+
+ // Caveat: vsub is not commutative w.r.t operands.
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix)
+ return false;
+
+ if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = DefMI2->getOperand(1);
+
+ Op1F = getKillRegState(Src2.isKill());
+ Src2.setIsKill(false);
+ Op0F = getKillRegState(Src1.isKill());
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+
+ return false;
+}
+
+bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changed = false;
+
+ if (DisableQFOptimizer)
+ return Changed;
+
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ if (!HST->useHVXV68Ops() || !HST->usePackets() ||
+ skipFunction(MF.getFunction()))
+ return false;
+ HII = HST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ MachineFunction::iterator MBBI = MF.begin();
+ LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
+ << " Optimize intermediate conversions ===\n");
+ while (MBBI != MF.end()) {
+ MachineBasicBlock *MBB = &*MBBI;
+ MachineBasicBlock::iterator MII = MBBI->instr_begin();
+ while (MII != MBBI->instr_end()) {
+ MachineInstr *MI = &*MII;
+ ++MII; // As MI might be removed.
+
+ if (QFPInstMap.count(MI->getOpcode()) &&
+ MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
+ MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
+ }
+ }
+ ++MBBI;
+ }
+ return Changed;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f5d8b69..d9824a31 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
initializeHexagonPeepholePass(PR);
initializeHexagonSplitConst32AndConst64Pass(PR);
initializeHexagonVectorPrintPass(PR);
+ initializeHexagonQFPOptimizerPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonGenInsert());
if (EnableEarlyIf)
addPass(createHexagonEarlyIfConversion());
+ addPass(createHexagonQFPOptimizer());
}
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8bf0d11..d477522 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -442,14 +442,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
- Subtarget.hasFRE()))
+ if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
- Subtarget.hasFRES()))
+ !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
@@ -565,16 +562,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::LRINT, MVT::f64, Legal);
- setOperationAction(ISD::LRINT, MVT::f32, Legal);
- setOperationAction(ISD::LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::LROUND, MVT::f64, Legal);
- setOperationAction(ISD::LROUND, MVT::f32, Legal);
- setOperationAction(ISD::LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::LLROUND, MVT::f32, Legal);
- }
+
+ setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom);
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
// The nearbyint variants are not allowed to raise the inexact exception
- // so we can only code-gen them with unsafe math.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- }
+ // so we can only code-gen them with fpexcept.ignore.
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -8911,11 +8905,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// be lost at this stage, but is below the single-precision rounding
// position.
//
- // However, if -enable-unsafe-fp-math is in effect, accept double
+ // However, if afn is in effect, accept double
// rounding to avoid the extra overhead.
- if (Op.getValueType() == MVT::f32 &&
- !Subtarget.hasFPCVT() &&
- !DAG.getTarget().Options.UnsafeFPMath) {
+ // FIXME: Currently INT_TO_FP can't support fast math flags because
+ // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
+ // false.
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
+ !Op->getFlags().hasApproximateFuncs()) {
// Twiddle input to make sure the low 11 bits are zero. (If this
// is the case, we are guaranteed the value will fit into the 53 bit
@@ -12759,6 +12755,14 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerADDSUBO_CARRY(Op, DAG);
case ISD::UCMP:
return LowerUCMP(Op, DAG);
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FNEARBYINT:
+ if (Op->getFlags().hasNoFPExcept())
+ return Op;
+ return SDValue();
}
}
@@ -13088,7 +13092,9 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ .addImm(PPC::PRED_NE_MINUS)
+ .addReg(PPC::CR0)
+ .addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
@@ -13346,7 +13352,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
.addReg(ZeroReg)
.addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE)
+ .addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR0)
.addMBB(loopMBB);
BB->addSuccessor(loopMBB);
@@ -14177,7 +14183,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(dest)
.addReg(oldval);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE)
+ .addImm(PPC::PRED_NE_MINUS)
.addReg(CrReg)
.addMBB(exitMBB);
BB->addSuccessor(loop2MBB);
@@ -14189,7 +14195,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(ptrA)
.addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_NE)
+ .addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR0)
.addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
@@ -18707,11 +18713,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
+ bool AllowContract = I->getFastMathFlags().allowContract() &&
+ User->getFastMathFlags().allowContract();
- return !(
- isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
}
case Instruction::Load: {
// Don't break "store (load float*)" pattern, this pattern will be combined
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 979ba31..885bed6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2786,14 +2786,16 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
// these need to be defined after the any_frint versions so ISEL will correctly
// add the chain to the strict versions.
-def : Pat<(f32 (fnearbyint f32:$S)),
+// TODO: Match strict fp rounding intrinsics with instructions like xsrdpiz when
+// rounding mode is propagated to CodeGen part.
+def : Pat<(f32 (strict_fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f64 (fnearbyint f64:$S)),
+def : Pat<(f64 (strict_fnearbyint f64:$S)),
(f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (fnearbyint v2f64:$S)),
+def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)),
(v2f64 (XVRDPIC $S))>;
-def : Pat<(v4f32 (fnearbyint v4f32:$S)),
+def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)),
(v4f32 (XVRSPIC $S))>;
// Materialize a zero-vector of long long
@@ -3578,25 +3580,25 @@ def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
+def : Pat<(i64 (strict_lrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
+def : Pat<(i64 (strict_lrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
+def : Pat<(i64 (strict_llrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
+def : Pat<(i64 (strict_llrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
+def : Pat<(i64 (strict_lround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
+def : Pat<(i64 (strict_lround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i32 (lround f64:$S)),
+def : Pat<(i32 (strict_lround f64:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>;
-def : Pat<(i32 (lround f32:$S)),
+def : Pat<(i32 (strict_lround f32:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
+def : Pat<(i64 (strict_llround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
+def : Pat<(i64 (strict_llround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e857b2d..edde7ac 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2406,7 +2406,8 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
}
bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) {
- if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa))
+ if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI->hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))
return Error(
ErrorLoc,
"operand must be "
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index b8ec0bb..4bea4c4 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -654,7 +654,10 @@ static constexpr FeatureBitset XqciFeatureGroup = {
static constexpr FeatureBitset XSfVectorGroup = {
RISCV::FeatureVendorXSfvcp, RISCV::FeatureVendorXSfvqmaccdod,
RISCV::FeatureVendorXSfvqmaccqoq, RISCV::FeatureVendorXSfvfwmaccqqq,
- RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase};
+ RISCV::FeatureVendorXSfvfnrclipxfqf, RISCV::FeatureVendorXSfmmbase,
+ RISCV::FeatureVendorXSfvfexpa, RISCV::FeatureVendorXSfvfexpa64e,
+ RISCV::FeatureVendorXSfvfbfexp16e, RISCV::FeatureVendorXSfvfexp16e,
+ RISCV::FeatureVendorXSfvfexp32e};
static constexpr FeatureBitset XSfSystemGroup = {
RISCV::FeatureVendorXSiFivecdiscarddlone,
RISCV::FeatureVendorXSiFivecflushdlone,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 50f5a5d..7b9c4b3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -220,7 +220,8 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
RISCVVType::getSEW(Imm) > 64 ||
(RISCVVType::isAltFmt(Imm) &&
- !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) ||
+ !(STI.hasFeature(RISCV::FeatureStdExtZvfbfa) ||
+ STI.hasFeature(RISCV::FeatureVendorXSfvfbfexp16e))) ||
(Imm >> 9) != 0) {
O << formatImm(Imm);
return;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 5dd4bf4..98b636e 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicSwap64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAdd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadSub64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadAnd64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadOr32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
+ case RISCV::PseudoAtomicLoadOr64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
+ case RISCV::PseudoAtomicLoadXor32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadXor64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
+ NextMBBI);
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
case RISCV::PseudoAtomicLoadNand64:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
NextMBBI);
+ case RISCV::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMin64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadUMax64:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
+ NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
@@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::And:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Or:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Xor:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
case AtomicRMWInst::Nand:
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -433,38 +521,85 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
.addReg(ShamtReg);
}
-bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
- assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+static void doAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- MachineFunction *MF = MBB.getParent();
- auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ // .loophead:
+ // lr.[w|d] dest, (addr)
+ // mv scratch, dest
+ // ifnochangeneeded scratch, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(DestReg)
+ .addImm(0);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(ScratchReg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(ScratchReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopHeadMBB);
- MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
- MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+ // .loopifbody:
+ // mv scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
- LoopHeadMBB->addSuccessor(LoopTailMBB);
- LoopIfBodyMBB->addSuccessor(LoopTailMBB);
- LoopTailMBB->addSuccessor(LoopHeadMBB);
- LoopTailMBB->addSuccessor(DoneMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopHeadMBB);
+ // .looptail:
+ // sc.[w|d] scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(AddrReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+}
+static void doMaskedAtomicMinMaxOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
+ const RISCVSubtarget *STI) {
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register Scratch1Reg = MI.getOperand(1).getReg();
Register Scratch2Reg = MI.getOperand(2).getReg();
@@ -541,6 +676,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch1Reg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ if (!IsMasked)
+ doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
+ LoopTailMBB, DoneMBB, BinOp, Width, STI);
+ else
+ doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
+ LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
+ Width, STI);
NextMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 19992e6..9e6b7f0 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -218,6 +218,7 @@ def HasStdExtZaamo
: Predicate<"Subtarget->hasStdExtZaamo()">,
AssemblerPredicate<(any_of FeatureStdExtZaamo),
"'Zaamo' (Atomic Memory Operations)">;
+def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">;
def FeatureStdExtZalrsc
: RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">;
@@ -1334,6 +1335,44 @@ def HasVendorXSfvfnrclipxfqf
AssemblerPredicate<(all_of FeatureVendorXSfvfnrclipxfqf),
"'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)">;
+// Note: XSfvfbfexp16e depends on either Zvfbfmin _or_ Zvfbfa, which cannot be expressed here in
+// TableGen. Instead, we check that in RISCVISAInfo.
+def FeatureVendorXSfvfbfexp16e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, BFloat16">;
+def HasVendorXSfvfbfexp16e : Predicate<"Subtarget->hasVendorXSfvfbfexp16e()">;
+
+def FeatureVendorXSfvfexp16e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, Half Precision",
+ [FeatureStdExtZvfh]>;
+def HasVendorXSfvfexp16e : Predicate<"Subtarget->hasVendorXSfvfexp16e()">;
+
+def FeatureVendorXSfvfexp32e
+ : RISCVExtension<0, 5,
+ "SiFive Vector Floating-Point Exponential Function Instruction, Single Precision",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfexp32e : Predicate<"Subtarget->hasVendorXSfvfexp32e()">;
+
+def HasVendorXSfvfexpAnyFloat : Predicate<"Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">;
+def HasVendorXSfvfexpAny : Predicate<"Subtarget->hasVendorXSfvfbfexp16e() || Subtarget->hasVendorXSfvfexp16e() || Subtarget->hasVendorXSfvfexp32e()">,
+ AssemblerPredicate<(any_of FeatureVendorXSfvfbfexp16e, FeatureVendorXSfvfexp16e, FeatureVendorXSfvfexp32e),
+ "'Xsfvfbfexp16e', 'Xsfvfexp16e', or 'Xsfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction)">;
+
+def FeatureVendorXSfvfexpa
+ : RISCVExtension<0, 2,
+ "SiFive Vector Floating-Point Exponential Approximation Instruction",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSfvfexpa : Predicate<"Subtarget->hasVendorXSfvfexpa()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvfexpa),
+ "'Xsfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction)">;
+
+def FeatureVendorXSfvfexpa64e
+ : RISCVExtension<0, 2,
+ "SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision",
+ [FeatureVendorXSfvfexpa, FeatureStdExtZve64d]>;
+def HasVendorXSfvfexpa64e : Predicate<"Subtarget->hasVendorXSfvfexpa64e()">;
+
def FeatureVendorXSiFivecdiscarddlone
: RISCVExtension<1, 0,
"SiFive sf.cdiscard.d.l1 Instruction", []>;
@@ -1864,7 +1903,7 @@ def FeatureForcedAtomics : SubtargetFeature<
"forced-atomics", "HasForcedAtomics", "true",
"Assume that lock-free native-width atomics are available">;
def HasAtomicLdSt
- : Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
+ : Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 169465e..26fe9ed 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
else if (Subtarget.hasStdExtZicbop())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
- if (Subtarget.hasStdExtA()) {
+ if (Subtarget.hasStdExtZalrsc()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
setMinCmpXchgSizeInBits(8);
@@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtZaamo())
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
if (Subtarget.hasForcedAtomics()) {
@@ -12649,10 +12649,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
// Reassemble the low and high pieces reversed.
- // FIXME: This is a CONCAT_VECTORS.
- SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
- return DAG.getInsertSubvector(DL, Res, Lo,
- LoVT.getVectorMinNumElements());
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
}
// Just promote the int type to i16 which will double the LMUL.
@@ -21878,7 +21875,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
// result is then sign extended to XLEN. With +A, the minimum width is
// 32 for both 64 and 32.
assert(getMinCmpXchgSizeInBits() == 32);
- assert(Subtarget.hasStdExtA());
+ assert(Subtarget.hasStdExtZalrsc());
return Op.getValueSizeInBits() - 31;
}
break;
@@ -24047,18 +24044,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- std::pair<Register, const TargetRegisterClass *> Res =
- TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
-
- // If we picked one of the Zfinx register classes, remap it to the GPR class.
- // FIXME: When Zfinx is supported in CodeGen this will need to take the
- // Subtarget into account.
- if (Res.second == &RISCV::GPRF16RegClass ||
- Res.second == &RISCV::GPRF32RegClass ||
- Res.second == &RISCV::GPRPairRegClass)
- return std::make_pair(Res.first, &RISCV::GPRRegClass);
-
- return Res;
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
InlineAsm::ConstraintCode
@@ -24485,6 +24471,25 @@ ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
}
+ISD::NodeType RISCVTargetLowering::getExtendForAtomicRMWArg(unsigned Op) const {
+ // Zaamo will use amo<op>.w which does not require extension.
+ if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
+ return ISD::ANY_EXTEND;
+
+ // Zalrsc pseudo expansions with comparison require sign-extension.
+ assert(Subtarget.hasStdExtZalrsc());
+ switch (Op) {
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return ISD::SIGN_EXTEND;
+ default:
+ break;
+ }
+ return ISD::ANY_EXTEND;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed7..9e3e2a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ public:
}
ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+ ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 12f776b..912b82d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1689,42 +1689,44 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
// TODO: Support more operations.
unsigned getPredicatedOpcode(unsigned Opcode) {
+ // clang-format off
switch (Opcode) {
- case RISCV::ADD: return RISCV::PseudoCCADD; break;
- case RISCV::SUB: return RISCV::PseudoCCSUB; break;
- case RISCV::SLL: return RISCV::PseudoCCSLL; break;
- case RISCV::SRL: return RISCV::PseudoCCSRL; break;
- case RISCV::SRA: return RISCV::PseudoCCSRA; break;
- case RISCV::AND: return RISCV::PseudoCCAND; break;
- case RISCV::OR: return RISCV::PseudoCCOR; break;
- case RISCV::XOR: return RISCV::PseudoCCXOR; break;
-
- case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
- case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
- case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
- case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
- case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
- case RISCV::ORI: return RISCV::PseudoCCORI; break;
- case RISCV::XORI: return RISCV::PseudoCCXORI; break;
-
- case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
- case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
- case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
- case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
- case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
-
- case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
- case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
- case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
- case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
-
- case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
- case RISCV::ORN: return RISCV::PseudoCCORN; break;
- case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
-
- case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS; break;
- case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ; break;
+ case RISCV::ADD: return RISCV::PseudoCCADD;
+ case RISCV::SUB: return RISCV::PseudoCCSUB;
+ case RISCV::SLL: return RISCV::PseudoCCSLL;
+ case RISCV::SRL: return RISCV::PseudoCCSRL;
+ case RISCV::SRA: return RISCV::PseudoCCSRA;
+ case RISCV::AND: return RISCV::PseudoCCAND;
+ case RISCV::OR: return RISCV::PseudoCCOR;
+ case RISCV::XOR: return RISCV::PseudoCCXOR;
+
+ case RISCV::ADDI: return RISCV::PseudoCCADDI;
+ case RISCV::SLLI: return RISCV::PseudoCCSLLI;
+ case RISCV::SRLI: return RISCV::PseudoCCSRLI;
+ case RISCV::SRAI: return RISCV::PseudoCCSRAI;
+ case RISCV::ANDI: return RISCV::PseudoCCANDI;
+ case RISCV::ORI: return RISCV::PseudoCCORI;
+ case RISCV::XORI: return RISCV::PseudoCCXORI;
+
+ case RISCV::ADDW: return RISCV::PseudoCCADDW;
+ case RISCV::SUBW: return RISCV::PseudoCCSUBW;
+ case RISCV::SLLW: return RISCV::PseudoCCSLLW;
+ case RISCV::SRLW: return RISCV::PseudoCCSRLW;
+ case RISCV::SRAW: return RISCV::PseudoCCSRAW;
+
+ case RISCV::ADDIW: return RISCV::PseudoCCADDIW;
+ case RISCV::SLLIW: return RISCV::PseudoCCSLLIW;
+ case RISCV::SRLIW: return RISCV::PseudoCCSRLIW;
+ case RISCV::SRAIW: return RISCV::PseudoCCSRAIW;
+
+ case RISCV::ANDN: return RISCV::PseudoCCANDN;
+ case RISCV::ORN: return RISCV::PseudoCCORN;
+ case RISCV::XNOR: return RISCV::PseudoCCXNOR;
+
+ case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS;
+ case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ;
}
+ // clang-format on
return RISCV::INSTRUCTION_LIST_END;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 571d72f..5c81a09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -158,9 +158,9 @@ class seq_cst_store<PatFrag base>
}
} // IsAtomic = 1
-// Atomic load/store are available under both +a and +force-atomics.
-// Fences will be inserted for atomic load/stores according to the logic in
-// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+// Atomic load/store are available under +zalrsc (thus also +a) and
+// +force-atomics. Fences will be inserted for atomic load/stores according to
+// the logic in RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
// The normal loads/stores are relaxed (unordered) loads/stores that don't have
// any ordering. This is necessary because AtomicExpandPass has added fences to
// atomic load/stores and changed them to unordered ones.
@@ -308,7 +308,65 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap32 : PseudoAMO;
+def PseudoAtomicLoadAdd32 : PseudoAMO;
+def PseudoAtomicLoadSub32 : PseudoAMO;
+def PseudoAtomicLoadAnd32 : PseudoAMO;
+def PseudoAtomicLoadOr32 : PseudoAMO;
+def PseudoAtomicLoadXor32 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax32 : PseudoAMO;
+def PseudoAtomicLoadMin32 : PseudoAMO;
+def PseudoAtomicLoadUMax32 : PseudoAMO;
+def PseudoAtomicLoadUMin32 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i32", PseudoAtomicSwap32>;
+defm : PseudoAMOPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
+defm : PseudoAMOPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
+defm : PseudoAMOPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
+defm : PseudoAMOPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
+defm : PseudoAMOPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoAMOPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoAMOPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
+defm : PseudoAMOPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoAMOPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo]
+
+let Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64] in {
+
+let Size = 16 in {
+def PseudoAtomicSwap64 : PseudoAMO;
+def PseudoAtomicLoadAdd64 : PseudoAMO;
+def PseudoAtomicLoadSub64 : PseudoAMO;
+def PseudoAtomicLoadAnd64 : PseudoAMO;
+def PseudoAtomicLoadOr64 : PseudoAMO;
+def PseudoAtomicLoadXor64 : PseudoAMO;
+} // Size = 16
+let Size = 24 in {
+def PseudoAtomicLoadMax64 : PseudoAMO;
+def PseudoAtomicLoadMin64 : PseudoAMO;
+def PseudoAtomicLoadUMax64 : PseudoAMO;
+def PseudoAtomicLoadUMin64 : PseudoAMO;
+} // Size = 24
+
+defm : PseudoAMOPat<"atomic_swap_i64", PseudoAtomicSwap64, i64>;
+defm : PseudoAMOPat<"atomic_load_add_i64", PseudoAtomicLoadAdd64, i64>;
+defm : PseudoAMOPat<"atomic_load_sub_i64", PseudoAtomicLoadSub64, i64>;
+defm : PseudoAMOPat<"atomic_load_and_i64", PseudoAtomicLoadAnd64, i64>;
+defm : PseudoAMOPat<"atomic_load_or_i64", PseudoAtomicLoadOr64, i64>;
+defm : PseudoAMOPat<"atomic_load_xor_i64", PseudoAtomicLoadXor64, i64>;
+defm : PseudoAMOPat<"atomic_load_max_i64", PseudoAtomicLoadMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_min_i64", PseudoAtomicLoadMin64, i64>;
+defm : PseudoAMOPat<"atomic_load_umax_i64", PseudoAtomicLoadUMax64, i64>;
+defm : PseudoAMOPat<"atomic_load_umin_i64", PseudoAtomicLoadUMin64, i64>;
+} // Predicates = [HasStdExtZalrsc, NoStdExtZaamo, IsRV64]
+
+let Predicates = [HasStdExtZalrsc] in {
let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
@@ -347,14 +405,14 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax,
PseudoMaskedAtomicLoadUMax32>;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin,
PseudoMaskedAtomicLoadUMin32>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
-let Predicates = [HasStdExtA, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, IsRV64] in {
let Size = 20 in
def PseudoAtomicLoadNand64 : PseudoAMO;
defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>;
-} // Predicates = [HasStdExtA, IsRV64]
+} // Predicates = [HasStdExtZalrsc, IsRV64]
/// Compare and exchange
@@ -385,17 +443,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas] in {
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
}
-let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in {
+let Predicates = [HasStdExtZalrsc, NoStdExtZacas, IsRV64] in {
def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
}
-let Predicates = [HasStdExtA] in {
+let Predicates = [HasStdExtZalrsc] in {
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
@@ -412,4 +470,4 @@ def : Pat<(XLenVT (int_riscv_masked_cmpxchg
(XLenVT GPR:$mask), (XLenVT timm:$ordering))),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-} // Predicates = [HasStdExtA]
+} // Predicates = [HasStdExtZalrsc]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 6a4119a..4104abd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -217,6 +217,14 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>;
}
+let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in {
+ def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">;
+}
+
+let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in {
+ def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">;
+}
+
let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector",
DestEEW = EEWSEWx4, RVVConstraint=VS2Constraint in {
def SF_VQMACCU_2x8x2 : CustomSiFiveVMACC<0b101100, OPMVV, "sf.vqmaccu.2x8x2">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 40d7341..62b7bcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -527,8 +527,8 @@ def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
let Predicates = [HasStdExtZbs] in {
def : Pat<(XLenVT (and (not (shl 1, shiftMaskXLen:$rs2)), GPR:$rs1)),
(BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
-def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
- (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (and (rotl -2, shiftMaskXLen:$rs2), GPR:$rs1)),
+ (BCLR GPR:$rs1, shiftMaskXLen:$rs2)>;
def : Pat<(XLenVT (or (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),
(BSET GPR:$rs1, shiftMaskXLen:$rs2)>;
def : Pat<(XLenVT (xor (shl 1, shiftMaskXLen:$rs2), GPR:$rs1)),
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 5591d9f..021353a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -355,9 +355,9 @@ private:
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
Register &ReadReg, MachineInstr &InsertionPoint) const;
- bool generateImageRead(Register &ResVReg, const SPIRVType *ResType,
- Register ImageReg, Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const;
+ bool generateImageReadOrFetch(Register &ResVReg, const SPIRVType *ResType,
+ Register ImageReg, Register IdxReg,
+ DebugLoc Loc, MachineInstr &Pos) const;
bool BuildCOPY(Register DestReg, Register SrcReg, MachineInstr &I) const;
bool loadVec3BuiltinInputID(SPIRV::BuiltIn::BuiltIn BuiltInValue,
Register ResVReg, const SPIRVType *ResType,
@@ -1321,8 +1321,8 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
Register IdxReg = IntPtrDef->getOperand(3).getReg();
- return generateImageRead(ResVReg, ResType, NewHandleReg, IdxReg,
- I.getDebugLoc(), I);
+ return generateImageReadOrFetch(ResVReg, ResType, NewHandleReg, IdxReg,
+ I.getDebugLoc(), I);
}
}
@@ -3639,27 +3639,33 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(
DebugLoc Loc = I.getDebugLoc();
MachineInstr &Pos = I;
- return generateImageRead(ResVReg, ResType, NewImageReg, IdxReg, Loc, Pos);
+ return generateImageReadOrFetch(ResVReg, ResType, NewImageReg, IdxReg, Loc,
+ Pos);
}
-bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
- const SPIRVType *ResType,
- Register ImageReg,
- Register IdxReg, DebugLoc Loc,
- MachineInstr &Pos) const {
+bool SPIRVInstructionSelector::generateImageReadOrFetch(
+ Register &ResVReg, const SPIRVType *ResType, Register ImageReg,
+ Register IdxReg, DebugLoc Loc, MachineInstr &Pos) const {
SPIRVType *ImageType = GR.getSPIRVTypeForVReg(ImageReg);
assert(ImageType && ImageType->getOpcode() == SPIRV::OpTypeImage &&
"ImageReg is not an image type.");
+
bool IsSignedInteger =
sampledTypeIsSignedInteger(GR.getTypeForSPIRVType(ImageType));
+ // Check if the "sampled" operand of the image type is 1.
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpImageFetch
+ auto SampledOp = ImageType->getOperand(6);
+ bool IsFetch = (SampledOp.getImm() == 1);
uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
if (ResultSize == 4) {
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
@@ -3668,11 +3674,13 @@ bool SPIRVInstructionSelector::generateImageRead(Register &ResVReg,
SPIRVType *ReadType = widenTypeToVec4(ResType, Pos);
Register ReadReg = MRI->createVirtualRegister(GR.getRegClass(ReadType));
- auto BMI = BuildMI(*Pos.getParent(), Pos, Loc, TII.get(SPIRV::OpImageRead))
- .addDef(ReadReg)
- .addUse(GR.getSPIRVTypeID(ReadType))
- .addUse(ImageReg)
- .addUse(IdxReg);
+ auto BMI =
+ BuildMI(*Pos.getParent(), Pos, Loc,
+ TII.get(IsFetch ? SPIRV::OpImageFetch : SPIRV::OpImageRead))
+ .addDef(ReadReg)
+ .addUse(GR.getSPIRVTypeID(ReadType))
+ .addUse(ImageReg)
+ .addUse(IdxReg);
if (IsSignedInteger)
BMI.addImm(0x1000); // SignExtend
bool Succeed = BMI.constrainAllUses(TII, TRI, RBI);
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index cf85691..9bda8a4 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -158,7 +158,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
Options.X = F.getFnAttribute(Y).getValueAsBool(); \
} while (0)
- RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b5f8ee5..b54a1e7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29516,11 +29516,8 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
if (IgnoreNaN || DAG.isKnownNeverNaN(IsNum ? NewY : NewX))
return MinMax;
- if (DAG.isKnownNeverNaN(NewX))
- NewX = NewY;
-
- SDValue IsNaN =
- DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO);
+ SDValue NaNSrc = IsNum ? MinMax : NewX;
+ SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NaNSrc, NaNSrc, ISD::SETUO);
return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
}
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 7882045..0fce5b9 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -567,8 +567,8 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT) {
default:
if (TT.isOSNetBSD())
return "apcs-gnu";
- if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() ||
- TT.isOHOSFamily())
+ if (TT.isOSFreeBSD() || TT.isOSFuchsia() || TT.isOSOpenBSD() ||
+ TT.isOSHaiku() || TT.isOHOSFamily())
return "aapcs-linux";
return "aapcs";
}
@@ -648,6 +648,8 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
}
case llvm::Triple::OpenBSD:
return "cortex-a8";
+ case llvm::Triple::Fuchsia:
+ return "cortex-a53";
default:
switch (Triple.getEnvironment()) {
case llvm::Triple::EABIHF:
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 31126cc..f08a0c0 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -765,6 +765,12 @@ Error RISCVISAInfo::checkDependency() {
if (HasZvl && !HasVector)
return getExtensionRequiresError("zvl*b", "v' or 'zve*");
+ if (Exts.count("xsfvfbfexp16e") &&
+ !(Exts.count("zvfbfmin") || Exts.count("zvfbfa")))
+ return createStringError(errc::invalid_argument,
+ "'xsfvfbfexp16e' requires 'zvfbfmin' or "
+ "'zvfbfa' extension to also be specified");
+
if (HasD && (HasC || Exts.count("zcd")))
for (auto Ext : ZcdOverlaps)
if (Exts.count(Ext.str()))
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 28ee444..a29faab 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1368,13 +1368,13 @@ static void ComputeCrossModuleImportForModuleFromIndexForTest(
FunctionImporter::ImportMapTy &ImportList) {
for (const auto &GlobalList : Index) {
// Ignore entries for undefined references.
- if (GlobalList.second.SummaryList.empty())
+ if (GlobalList.second.getSummaryList().empty())
continue;
auto GUID = GlobalList.first;
- assert(GlobalList.second.SummaryList.size() == 1 &&
+ assert(GlobalList.second.getSummaryList().size() == 1 &&
"Expected individual combined index to have one summary per GUID");
- auto &Summary = GlobalList.second.SummaryList[0];
+ auto &Summary = GlobalList.second.getSummaryList()[0];
// Skip the summaries for the importing module. These are included to
// e.g. record required linkage changes.
if (Summary->modulePath() == ModulePath)
@@ -1423,7 +1423,7 @@ void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
for (const auto &Entry : Index) {
- for (const auto &S : Entry.second.SummaryList) {
+ for (const auto &S : Entry.second.getSummaryList()) {
if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
updateValueInfoForIndirectCalls(Index, FS);
}
@@ -1456,7 +1456,7 @@ void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
// Add values flagged in the index as live roots to the worklist.
for (const auto &Entry : Index) {
auto VI = Index.getValueInfo(Entry);
- for (const auto &S : Entry.second.SummaryList) {
+ for (const auto &S : Entry.second.getSummaryList()) {
if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
updateValueInfoForIndirectCalls(Index, FS);
if (S->isLive()) {
@@ -2094,7 +2094,7 @@ static bool doImportingForModuleForTest(
// is only enabled when testing importing via the 'opt' tool, which does
// not do the ThinLink that would normally determine what values to promote.
for (auto &I : *Index) {
- for (auto &S : I.second.SummaryList) {
+ for (auto &S : I.second.getSummaryList()) {
if (GlobalValue::isLocalLinkage(S->linkage()))
S->setLinkage(GlobalValue::ExternalLinkage);
}
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index be6cba3..aa1346d 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
// the module flags.
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("branch-target-enforcement")))
- HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
+ HasBranchTargetEnforcement = !BTE->isZero();
else
HasBranchTargetEnforcement = 0;
}
@@ -2130,7 +2130,7 @@ bool LowerTypeTestsModule::lower() {
// A set of all functions that are address taken by a live global object.
DenseSet<GlobalValue::GUID> AddressTaken;
for (auto &I : *ExportSummary)
- for (auto &GVS : I.second.SummaryList)
+ for (auto &GVS : I.second.getSummaryList())
if (GVS->isLive())
for (const auto &Ref : GVS->refs()) {
AddressTaken.insert(Ref.getGUID());
@@ -2409,7 +2409,7 @@ bool LowerTypeTestsModule::lower() {
}
for (auto &P : *ExportSummary) {
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
if (!ExportSummary->isGlobalValueLive(S.get()))
continue;
if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()))
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 2d5cb82..76e588b 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -928,7 +928,7 @@ void llvm::updateVCallVisibilityInIndex(
// linker, as we have no information on their eventual use.
if (DynamicExportSymbols.count(P.first))
continue;
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
if (!GVar ||
GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
@@ -2413,7 +2413,7 @@ bool DevirtModule::run() {
}
for (auto &P : *ExportSummary) {
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
if (!FS)
continue;
@@ -2564,7 +2564,7 @@ void DevirtIndex::run() {
// Collect information from summary about which calls to try to devirtualize.
for (auto &P : ExportSummary) {
- for (auto &S : P.second.SummaryList) {
+ for (auto &S : P.second.getSummaryList()) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
if (!FS)
continue;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index cdc559b..9b9fe26 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1643,33 +1643,46 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
/// Return a Constant* for the specified floating-point constant if it fits
/// in the specified FP type without changing its value.
-static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
bool losesInfo;
- APFloat F = CFP->getValueAPF();
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
return !losesInfo;
}
-static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
- if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
- return nullptr; // No constant folding of this.
+static Type *shrinkFPConstant(LLVMContext &Ctx, const APFloat &F,
+ bool PreferBFloat) {
// See if the value can be truncated to bfloat and then reextended.
- if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
- return Type::getBFloatTy(CFP->getContext());
+ if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
+ return Type::getBFloatTy(Ctx);
// See if the value can be truncated to half and then reextended.
- if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
- return Type::getHalfTy(CFP->getContext());
+ if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
+ return Type::getHalfTy(Ctx);
// See if the value can be truncated to float and then reextended.
- if (fitsInFPType(CFP, APFloat::IEEEsingle()))
- return Type::getFloatTy(CFP->getContext());
- if (CFP->getType()->isDoubleTy())
- return nullptr; // Won't shrink.
- if (fitsInFPType(CFP, APFloat::IEEEdouble()))
- return Type::getDoubleTy(CFP->getContext());
+ if (fitsInFPType(F, APFloat::IEEEsingle()))
+ return Type::getFloatTy(Ctx);
+ if (&F.getSemantics() == &APFloat::IEEEdouble())
+ return nullptr; // Won't shrink.
+ // See if the value can be truncated to double and then reextended.
+ if (fitsInFPType(F, APFloat::IEEEdouble()))
+ return Type::getDoubleTy(Ctx);
// Don't try to shrink to various long double types.
return nullptr;
}
+static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
+ Type *Ty = CFP->getType();
+ if (Ty->getScalarType()->isPPC_FP128Ty())
+ return nullptr; // No constant folding of this.
+
+ Type *ShrinkTy =
+ shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
+ if (ShrinkTy)
+ if (auto *VecTy = dyn_cast<VectorType>(Ty))
+ ShrinkTy = VectorType::get(ShrinkTy, VecTy);
+
+ return ShrinkTy;
+}
+
// Determine if this is a vector of ConstantFPs and if so, return the minimal
// type we can safely truncate all elements to.
static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
@@ -1720,10 +1733,10 @@ static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
// Try to shrink scalable and fixed splat vectors.
if (auto *FPC = dyn_cast<Constant>(V))
- if (isa<VectorType>(V->getType()))
+ if (auto *VTy = dyn_cast<VectorType>(V->getType()))
if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
- return T;
+ return VectorType::get(T, VTy);
// Try to shrink a vector of FP constants. This returns nullptr on scalable
// vectors
@@ -1796,10 +1809,9 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
Type *Ty = FPT.getType();
auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
if (BO && BO->hasOneUse()) {
- Type *LHSMinType =
- getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
- Type *RHSMinType =
- getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
+ bool PreferBFloat = Ty->getScalarType()->isBFloatTy();
+ Type *LHSMinType = getMinimumFPType(BO->getOperand(0), PreferBFloat);
+ Type *RHSMinType = getMinimumFPType(BO->getOperand(1), PreferBFloat);
unsigned OpWidth = BO->getType()->getFPMantissaWidth();
unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 975498f..5aa8de3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// select a, false, b -> select !a, b, false
if (match(TrueVal, m_Specific(Zero))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, FalseVal, Zero);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// select a, b, true -> select !a, true, b
if (match(FalseVal, m_Specific(One))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, One, TrueVal);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// DeMorgan in select form: !a && !b --> !(a || b)
// select !a, !b, false --> not (select a, true, b)
if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || TrueVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// DeMorgan in select form: !a || !b --> !(a && b)
// select !a, true, !b --> not (select a, b, false)
if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || FalseVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// select (select a, true, b), true, b -> select a, true, b
if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) &&
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 40720ae..0873845 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -31,10 +31,12 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -53,29 +55,12 @@
#include <variant>
using namespace llvm;
+using TokenMode = AllocTokenMode;
#define DEBUG_TYPE "alloc-token"
namespace {
-//===--- Constants --------------------------------------------------------===//
-
-enum class TokenMode : unsigned {
- /// Incrementally increasing token ID.
- Increment = 0,
-
- /// Simple mode that returns a statically-assigned random token ID.
- Random = 1,
-
- /// Token ID based on allocated type hash.
- TypeHash = 2,
-
- /// Token ID based on allocated type hash, where the top half ID-space is
- /// reserved for types that contain pointers and the bottom half for types
- /// that do not contain pointers.
- TypeHashPointerSplit = 3,
-};
-
//===--- Command-line options ---------------------------------------------===//
cl::opt<TokenMode> ClMode(
@@ -131,7 +116,7 @@ cl::opt<uint64_t> ClFallbackToken(
//===--- Statistics -------------------------------------------------------===//
-STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumFunctionsModified, "Functions modified");
STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
//===----------------------------------------------------------------------===//
@@ -140,9 +125,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
///
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenMetadata(const CallBase &CB) {
- MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
- if (!Ret)
- return nullptr;
+ MDNode *Ret = nullptr;
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
+ Ret = cast<MDNode>(MDV->getMetadata());
+ // If the intrinsic has an empty MDNode, type inference failed.
+ if (Ret->getNumOperands() == 0)
+ return nullptr;
+ } else {
+ Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+ if (!Ret)
+ return nullptr;
+ }
assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
@@ -206,22 +201,19 @@ public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- const auto [N, H] = getHash(CB, ORE);
- return N ? boundedToken(H) : H;
- }
-protected:
- std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
- OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
- return {N, getStableSipHash(S->getString())};
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens))
+ return *Token;
}
// Fallback.
remarkNoMetadata(CB, ORE);
- return {nullptr, ClFallbackToken};
+ return ClFallbackToken;
}
+protected:
/// Remark that there was no precise type information.
static void remarkNoMetadata(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
@@ -242,20 +234,18 @@ public:
using TypeHashMode::TypeHashMode;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- if (MaxTokens == 1)
- return 0;
- const uint64_t HalfTokens = MaxTokens / 2;
- const auto [N, H] = getHash(CB, ORE);
- if (!N) {
- // Pick the fallback token (ClFallbackToken), which by default is 0,
- // meaning it'll fall into the pointer-less bucket. Override by setting
- // -alloc-token-fallback if that is the wrong choice.
- return H;
+ if (MDNode *N = getAllocTokenMetadata(CB)) {
+ MDString *S = cast<MDString>(N->getOperand(0));
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata,
+ MaxTokens))
+ return *Token;
}
- uint64_t Hash = H % HalfTokens; // base hash
- if (containsPointer(N))
- Hash += HalfTokens;
- return Hash;
+ // Pick the fallback token (ClFallbackToken), which by default is 0, meaning
+ // it'll fall into the pointer-less bucket. Override by setting
+ // -alloc-token-fallback if that is the wrong choice.
+ remarkNoMetadata(CB, ORE);
+ return ClFallbackToken;
}
};
@@ -315,6 +305,9 @@ private:
FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
LibFunc OriginalFunc);
+ /// Lower alloc_token_* intrinsics.
+ void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
+
/// Return the token ID from metadata in the call.
uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
@@ -336,21 +329,32 @@ bool AllocToken::instrumentFunction(Function &F) {
// Do not apply any instrumentation for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
- return false;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return false;
- // Only instrument functions that have the sanitize_alloc_token attribute.
- if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
- return false;
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+ SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
+
+ // Only instrument functions that have the sanitize_alloc_token attribute.
+ const bool InstrumentFunction =
+ F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
+ !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
// Collect all allocation calls to avoid iterator invalidation.
for (Instruction &I : instructions(F)) {
+ // Collect all alloc_token_* intrinsics.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ IntrinsicInsts.emplace_back(II);
+ continue;
+ }
+
+ if (!InstrumentFunction)
+ continue;
+
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
@@ -359,11 +363,21 @@ bool AllocToken::instrumentFunction(Function &F) {
}
bool Modified = false;
- for (auto &[CB, Func] : AllocCalls)
- Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
- if (Modified)
- NumFunctionsInstrumented++;
+ if (!AllocCalls.empty()) {
+ for (auto &[CB, Func] : AllocCalls)
+ Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+ if (Modified)
+ NumFunctionsModified++;
+ }
+
+ if (!IntrinsicInsts.empty()) {
+ for (auto *II : IntrinsicInsts)
+ replaceIntrinsicInst(II, ORE);
+ Modified = true;
+ NumFunctionsModified++;
+ }
+
return Modified;
}
@@ -381,7 +395,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB,
if (TLI.getLibFunc(*Callee, Func)) {
if (isInstrumentableLibFunc(Func, CB, TLI))
return Func;
- } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+ } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
return NotLibFunc;
}
@@ -528,6 +542,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
return TokenAlloc;
}
+void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
+ OptimizationRemarkEmitter &ORE) {
+ assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
+
+ uint64_t TokenID = getToken(*II, ORE);
+ Value *V = ConstantInt::get(IntPtrTy, TokenID);
+ II->replaceAllUsesWith(V);
+ II->eraseFromParent();
+}
+
} // namespace
AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index d18c0d0..80e77e09 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F,
F.removeFnAttr(attr); \
FMF.set##setter(); \
}
- MOVE_FLAG("unsafe-fp-math", Fast)
MOVE_FLAG("no-infs-fp-math", NoInfs)
MOVE_FLAG("no-nans-fp-math", NoNaNs)
MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index febdc54..adf27be 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1011,6 +1011,10 @@ public:
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const {
+ // Truncs must truncate at most to their destination type.
+ if (isa_and_nonnull<TruncInst>(I) && MinBWs.contains(I) &&
+ I->getType()->getScalarSizeInBits() < MinBWs.lookup(I))
+ return false;
return VF.isVector() && MinBWs.contains(I) &&
!isProfitableToScalarize(I, VF) &&
!isScalarAfterVectorization(I, VF);
@@ -7227,9 +7231,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
return DenseMap<const SCEV *, Value *>();
}
- VPlanTransforms::narrowInterleaveGroups(
- BestVPlan, BestVF,
- TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
@@ -8198,6 +8199,10 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (CM.foldTailWithEVL())
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
*Plan, CM.getMaxSafeElements());
+
+ if (auto P = VPlanTransforms::narrowInterleaveGroups(*Plan, TTI))
+ VPlans.push_back(std::move(P));
+
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3f18bd7..cdb9e7e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5577,62 +5577,79 @@ private:
}
// Decrement the unscheduled counter and insert to ready list if
// ready.
- auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE,
- unsigned OpIdx) {
- if (!ScheduleCopyableDataMap.empty()) {
- const EdgeInfo EI = {UserTE, OpIdx};
- if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
- DecrUnsched(CD, /*IsControl=*/false);
- return;
- }
- }
- auto It = OperandsUses.find(I);
- assert(It != OperandsUses.end() && "Operand not found");
- if (It->second > 0) {
- --It->getSecond();
- assert(TotalOpCount > 0 && "No more operands to decrement");
- --TotalOpCount;
- if (ScheduleData *OpSD = getScheduleData(I))
- DecrUnsched(OpSD, /*IsControl=*/false);
- }
- };
+ auto DecrUnschedForInst =
+ [&](Instruction *I, TreeEntry *UserTE, unsigned OpIdx,
+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>>
+ &Checked) {
+ if (!ScheduleCopyableDataMap.empty()) {
+ const EdgeInfo EI = {UserTE, OpIdx};
+ if (ScheduleCopyableData *CD =
+ getScheduleCopyableData(EI, I)) {
+ if (!Checked.insert(std::make_pair(CD, OpIdx)).second)
+ return;
+ DecrUnsched(CD, /*IsControl=*/false);
+ return;
+ }
+ }
+ auto It = OperandsUses.find(I);
+ assert(It != OperandsUses.end() && "Operand not found");
+ if (It->second > 0) {
+ --It->getSecond();
+ assert(TotalOpCount > 0 && "No more operands to decrement");
+ --TotalOpCount;
+ if (ScheduleData *OpSD = getScheduleData(I)) {
+ if (!Checked.insert(std::make_pair(OpSD, OpIdx)).second)
+ return;
+ DecrUnsched(OpSD, /*IsControl=*/false);
+ }
+ }
+ };
for (ScheduleBundle *Bundle : Bundles) {
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
break;
// Need to search for the lane since the tree entry can be
// reordered.
- int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(),
- find(Bundle->getTreeEntry()->Scalars, In));
- assert(Lane >= 0 && "Lane not set");
- if (isa<StoreInst>(In) &&
- !Bundle->getTreeEntry()->ReorderIndices.empty())
- Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
- assert(Lane < static_cast<int>(
- Bundle->getTreeEntry()->Scalars.size()) &&
- "Couldn't find extract lane");
-
- // Since vectorization tree is being built recursively this
- // assertion ensures that the tree entry has all operands set before
- // reaching this code. Couple of exceptions known at the moment are
- // extracts where their second (immediate) operand is not added.
- // Since immediates do not affect scheduler behavior this is
- // considered okay.
- assert(In &&
- (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
- In->getNumOperands() ==
- Bundle->getTreeEntry()->getNumOperands() ||
- Bundle->getTreeEntry()->isCopyableElement(In)) &&
- "Missed TreeEntry operands?");
-
- for (unsigned OpIdx :
- seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
- if (auto *I = dyn_cast<Instruction>(
- Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
- LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " << *I
- << "\n");
- DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx);
- }
+ auto *It = find(Bundle->getTreeEntry()->Scalars, In);
+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
+ do {
+ int Lane =
+ std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
+ assert(Lane >= 0 && "Lane not set");
+ if (isa<StoreInst>(In) &&
+ !Bundle->getTreeEntry()->ReorderIndices.empty())
+ Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
+ assert(Lane < static_cast<int>(
+ Bundle->getTreeEntry()->Scalars.size()) &&
+ "Couldn't find extract lane");
+
+ // Since vectorization tree is being built recursively this
+ // assertion ensures that the tree entry has all operands set
+ // before reaching this code. Couple of exceptions known at the
+ // moment are extracts where their second (immediate) operand is
+ // not added. Since immediates do not affect scheduler behavior
+ // this is considered okay.
+ assert(In &&
+ (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
+ In->getNumOperands() ==
+ Bundle->getTreeEntry()->getNumOperands() ||
+ Bundle->getTreeEntry()->isCopyableElement(In)) &&
+ "Missed TreeEntry operands?");
+
+ for (unsigned OpIdx :
+ seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
+ if (auto *I = dyn_cast<Instruction>(
+ Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
+ LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): "
+ << *I << "\n");
+ DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, Checked);
+ }
+ // If parent node is schedulable, it will be handle correctly.
+ if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
+ break;
+ It = std::find(std::next(It),
+ Bundle->getTreeEntry()->Scalars.end(), In);
+ } while (It != Bundle->getTreeEntry()->Scalars.end());
}
} else {
// If BundleMember is a stand-alone instruction, no operand reordering
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d167009..c95c887 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
- if (!VPBB)
- return false;
-
- // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
- // VPBB as its entry, i.e., free of predecessors.
- if (auto *R = VPBB->getParent())
- return !R->isReplicator() && !VPBB->hasPredecessors();
-
- // A header dominates its second predecessor (the latch), with the other
- // predecessor being the preheader
- return VPB->getPredecessors().size() == 2 &&
- VPDT.dominates(VPB, VPB->getPredecessors()[1]);
-}
-
-bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- // A latch has a header as its second successor, with its other successor
- // leaving the loop. A preheader OTOH has a header as its first (and only)
- // successor.
- return VPB->getNumSuccessors() == 2 &&
- VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
-}
-
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ VPlan &Plan = *getPlan();
+ VPRegionBlock *NewRegion =
+ isReplicator()
+ ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
+ : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
return NewRegion;
@@ -1213,6 +1191,7 @@ VPlan *VPlan::duplicate() {
}
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
Old2NewVPValues[&VF] = &NewPlan->VF;
+ Old2NewVPValues[&UF] = &NewPlan->UF;
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
if (BackedgeTakenCount) {
NewPlan->BackedgeTakenCount = new VPValue();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fed04eb..167ba55 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4152,6 +4152,9 @@ class VPlan {
/// Represents the vectorization factor of the loop.
VPValue VF;
+ /// Represents the symbolic unroll factor of the loop.
+ VPValue UF;
+
/// Represents the loop-invariant VF * UF of the vector loop region.
VPValue VFxUF;
@@ -4305,6 +4308,9 @@ public:
VPValue &getVF() { return VF; };
const VPValue &getVF() const { return VF; };
+ /// Returns the symbolic UF of the vector loop region.
+ VPValue &getSymbolicUF() { return UF; };
+
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -4314,6 +4320,12 @@ public:
void addVF(ElementCount VF) { VFs.insert(VF); }
+ /// Remove \p VF from the plan.
+ void removeVF(ElementCount VF) {
+ assert(hasVF(VF) && "tried to remove VF not present in plan");
+ VFs.remove(VF);
+ }
+
void setVF(ElementCount VF) {
assert(hasVF(VF) && "Cannot set VF not already in plan");
VFs.clear();
@@ -4438,22 +4450,24 @@ public:
return VPB;
}
- /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
- /// IsReplicator is true, the region is a replicate region. The returned block
- /// is owned by the VPlan and deleted once the VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "",
- bool IsReplicator = false) {
- auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
+ /// Create a new loop region with \p Name and entry and exiting blocks set
+ /// to \p Entry and \p Exiting respectively, if set. The returned block is
+ /// owned by the VPlan and deleted once the VPlan is destroyed.
+ VPRegionBlock *createLoopRegion(const std::string &Name = "",
+ VPBlockBase *Entry = nullptr,
+ VPBlockBase *Exiting = nullptr) {
+ auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
+ : new VPRegionBlock(Name);
CreatedBlocks.push_back(VPB);
return VPB;
}
- /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set
- /// to nullptr. The returned block is owned by the VPlan and deleted once the
- /// VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(const std::string &Name = "") {
- auto *VPB = new VPRegionBlock(Name);
+ /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
+ /// returned block is owned by the VPlan and deleted once the VPlan is
+ /// destroyed.
+ VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const std::string &Name = "") {
+ auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
CreatedBlocks.push_back(VPB);
return VPB;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 332791a..65688a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
// LatchExitVPB, taking care to preserve the original predecessor & successor
// order of blocks. Set region entry and exiting after both HeaderVPB and
// LatchVPBB have been disconnected from their predecessors/successors.
- auto *R = Plan.createVPRegionBlock();
+ auto *R = Plan.createLoopRegion();
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
VPBlockUtils::disconnectBlocks(LatchVPBB, R);
VPBlockUtils::connectBlocks(PreheaderVPBB, R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e060e70..ff25ef5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
auto *Exiting =
Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
VPRegionBlock *Region =
- Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);
+ Plan.createReplicateRegion(Entry, Exiting, RegionName);
// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
@@ -1478,11 +1478,8 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
if (!Plan.getVectorLoopRegion())
return false;
- if (!Plan.getTripCount()->isLiveIn())
- return false;
- auto *TC = dyn_cast_if_present<ConstantInt>(
- Plan.getTripCount()->getUnderlyingValue());
- if (!TC || !BestVF.isFixed())
+ const APInt *TC;
+ if (!BestVF.isFixed() || !match(Plan.getTripCount(), m_APInt(TC)))
return false;
// Calculate the minimum power-of-2 bit width that can fit the known TC, VF
@@ -1495,7 +1492,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
return std::max<unsigned>(PowerOf2Ceil(MaxVal.getActiveBits()), 8);
};
unsigned NewBitWidth =
- ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF);
+ ComputeBitWidth(*TC, BestVF.getKnownMinValue() * BestUF);
LLVMContext &Ctx = Plan.getContext();
auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
@@ -2092,8 +2089,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
// Recipes in replicate regions implicitly depend on predicate. If either
// recipe is in a replicate region, only consider them equal if both have
// the same parent.
- const VPRegionBlock *RegionL = L->getParent()->getParent();
- const VPRegionBlock *RegionR = R->getParent()->getParent();
+ const VPRegionBlock *RegionL = L->getRegion();
+ const VPRegionBlock *RegionR = R->getRegion();
if (((RegionL && RegionL->isReplicator()) ||
(RegionR && RegionR->isReplicator())) &&
L->getParent() != R->getParent())
@@ -3867,8 +3864,7 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
// required lanes implicitly.
// TODO: Remove once replicate regions are unrolled completely.
auto IsCandidateUnpackUser = [Def](VPUser *U) {
- VPRegionBlock *ParentRegion =
- cast<VPRecipeBase>(U)->getParent()->getParent();
+ VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return U->usesScalars(Def) &&
(!ParentRegion || !ParentRegion->isReplicator());
};
@@ -3960,6 +3956,9 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
// used.
// TODO: Assert that they aren't used.
+ VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
+ Plan.getSymbolicUF().replaceAllUsesWith(UF);
+
// If there are no users of the runtime VF, compute VFxUF by constant folding
// the multiplication of VF and UF.
if (VF.getNumUsers() == 0) {
@@ -3979,7 +3978,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
}
VF.replaceAllUsesWith(RuntimeVF);
- VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4047,14 +4045,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
return false;
}
-/// Returns true if \p IR is a full interleave group with factor and number of
-/// members both equal to \p VF. The interleave group must also access the full
-/// vector width \p VectorRegWidth.
-static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
- unsigned VF, VPTypeAnalysis &TypeInfo,
- unsigned VectorRegWidth) {
+/// Returns VF from \p VFs if \p IR is a full interleave group with factor and
+/// number of members both equal to VF. The interleave group must also access
+/// the full vector width.
+static std::optional<ElementCount> isConsecutiveInterleaveGroup(
+ VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs,
+ VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) {
if (!InterleaveR)
- return false;
+ return std::nullopt;
Type *GroupElementTy = nullptr;
if (InterleaveR->getStoredValues().empty()) {
@@ -4063,7 +4061,7 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
[&TypeInfo, GroupElementTy](VPValue *Op) {
return TypeInfo.inferScalarType(Op) == GroupElementTy;
}))
- return false;
+ return std::nullopt;
} else {
GroupElementTy =
TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]);
@@ -4071,13 +4069,27 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
[&TypeInfo, GroupElementTy](VPValue *Op) {
return TypeInfo.inferScalarType(Op) == GroupElementTy;
}))
- return false;
+ return std::nullopt;
}
- unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF;
- auto IG = InterleaveR->getInterleaveGroup();
- return IG->getFactor() == VF && IG->getNumMembers() == VF &&
- GroupSize == VectorRegWidth;
+ auto GetVectorWidthForVF = [&TTI](ElementCount VF) {
+ TypeSize Size = TTI.getRegisterBitWidth(
+ VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector
+ : TargetTransformInfo::RGK_ScalableVector);
+ assert(Size.isScalable() == VF.isScalable() &&
+ "if Size is scalable, VF must to and vice versa");
+ return Size.getKnownMinValue();
+ };
+
+ for (ElementCount VF : VFs) {
+ unsigned MinVal = VF.getKnownMinValue();
+ unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal;
+ auto IG = InterleaveR->getInterleaveGroup();
+ if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal &&
+ GroupSize == GetVectorWidthForVF(VF))
+ return {VF};
+ }
+ return std::nullopt;
}
/// Returns true if \p VPValue is a narrow VPValue.
@@ -4088,16 +4100,18 @@ static bool isAlreadyNarrow(VPValue *VPV) {
return RepR && RepR->isSingleScalar();
}
-void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
- unsigned VectorRegWidth) {
+std::unique_ptr<VPlan>
+VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
+ const TargetTransformInfo &TTI) {
+ using namespace llvm::VPlanPatternMatch;
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
+
if (!VectorLoop)
- return;
+ return nullptr;
VPTypeAnalysis TypeInfo(Plan);
-
- unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
+ std::optional<ElementCount> VFToOptimize;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
continue;
@@ -4111,30 +4125,33 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// * recipes writing to memory except interleave groups
// Only support plans with a canonical induction phi.
if (R.isPhi())
- return;
+ return nullptr;
auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
if (R.mayWriteToMemory() && !InterleaveR)
- return;
-
- // Do not narrow interleave groups if there are VectorPointer recipes and
- // the plan was unrolled. The recipe implicitly uses VF from
- // VPTransformState.
- // TODO: Remove restriction once the VF for the VectorPointer offset is
- // modeled explicitly as operand.
- if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
- return;
+ return nullptr;
// All other ops are allowed, but we reject uses that cannot be converted
// when checking all allowed consumers (store interleave groups) below.
if (!InterleaveR)
continue;
- // Bail out on non-consecutive interleave groups.
- if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
- VectorRegWidth))
- return;
-
+ // Try to find a single VF, where all interleave groups are consecutive and
+ // saturate the full vector width. If we already have a candidate VF, check
+ // if it is applicable for the current InterleaveR, otherwise look for a
+ // suitable VF across the Plans VFs.
+ //
+ if (VFToOptimize) {
+ if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo,
+ TTI))
+ return nullptr;
+ } else {
+ if (auto VF = isConsecutiveInterleaveGroup(
+ InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI))
+ VFToOptimize = *VF;
+ else
+ return nullptr;
+ }
// Skip read interleave groups.
if (InterleaveR->getStoredValues().empty())
continue;
@@ -4168,24 +4185,34 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
if (!WideMember0)
- return;
+ return nullptr;
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
R->getNumOperands() > 2)
- return;
+ return nullptr;
if (any_of(enumerate(R->operands()),
[WideMember0, Idx = I](const auto &P) {
const auto &[OpIdx, OpV] = P;
return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
}))
- return;
+ return nullptr;
}
StoreGroups.push_back(InterleaveR);
}
if (StoreGroups.empty())
- return;
+ return nullptr;
+
+ // All interleave groups in Plan can be narrowed for VFToOptimize. Split the
+ // original Plan into 2: a) a new clone which contains all VFs of Plan, except
+ // VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
+ std::unique_ptr<VPlan> NewPlan;
+ if (size(Plan.vectorFactors()) != 1) {
+ NewPlan = std::unique_ptr<VPlan>(Plan.duplicate());
+ Plan.setVF(*VFToOptimize);
+ NewPlan->removeVF(*VFToOptimize);
+ }
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
SmallPtrSet<VPValue *, 4> NarrowedOps;
@@ -4256,9 +4283,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
VPBuilder PHBuilder(Plan.getVectorPreheader());
- VPValue *UF = Plan.getOrAddLiveIn(
- ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
- if (VF.isScalable()) {
+ VPValue *UF = &Plan.getSymbolicUF();
+ if (VFToOptimize->isScalable()) {
VPValue *VScale = PHBuilder.createElementCount(
CanIV->getScalarType(), ElementCount::getScalable(1));
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
@@ -4270,6 +4296,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
}
removeDeadRecipes(Plan);
+ assert(none_of(*VectorLoop->getEntryBasicBlock(),
+ IsaPred<VPVectorPointerRecipe>) &&
+ "All VPVectorPointerRecipes should have been removed");
+ return NewPlan;
}
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index b28559b..ca8d956 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -341,14 +341,20 @@ struct VPlanTransforms {
static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
ScalarEvolution &SE);
- /// Try to convert a plan with interleave groups with VF elements to a plan
- /// with the interleave groups replaced by wide loads and stores processing VF
- /// elements, if all transformed interleave groups access the full vector
- /// width (checked via \o VectorRegWidth). This effectively is a very simple
- /// form of loop-aware SLP, where we use interleave groups to identify
- /// candidates.
- static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
- unsigned VectorRegWidth);
+ /// Try to find a single VF among \p Plan's VFs for which all interleave
+ /// groups (with known minimum VF elements) can be replaced by wide loads and
+ /// stores processing VF elements, if all transformed interleave groups access
+ /// the full vector width (checked via the maximum vector register width). If
+ /// the transformation can be applied, the original \p Plan will be split in
+ /// 2:
+ /// 1. The original Plan with the single VF containing the optimized recipes
+ /// using wide loads instead of interleave groups.
+ /// 2. A new clone which contains all VFs of Plan except the optimized VF.
+ ///
+ /// This effectively is a very simple form of loop-aware SLP, where we use
+ /// interleave groups to identify candidates.
+ static std::unique_ptr<VPlan>
+ narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
/// Predicate and linearize the control-flow in the only loop region of
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 10801c0..32e4b88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -8,6 +8,7 @@
#include "VPlanUtils.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
return UncountableCondition;
}
+
+bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ if (!VPBB)
+ return false;
+
+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
+ // VPBB as its entry, i.e., free of predecessors.
+ if (auto *R = VPBB->getParent())
+ return !R->isReplicator() && !VPBB->hasPredecessors();
+
+ // A header dominates its second predecessor (the latch), with the other
+ // predecessor being the preheader
+ return VPB->getPredecessors().size() == 2 &&
+ VPDT.dominates(VPB, VPB->getPredecessors()[1]);
+}
+
+bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ // A latch has a header as its second successor, with its other successor
+ // leaving the loop. A preheader OTOH has a header as its first (and only)
+ // successor.
+ return VPB->getNumSuccessors() == 2 &&
+ VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
+}