aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp10
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp53
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp9
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp33
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp34
-rw-r--r--llvm/lib/Support/APFloat.cpp538
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h9
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp47
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td675
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp29
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp22
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp42
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp40
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp186
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp107
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h2
32 files changed, 1530 insertions, 520 deletions
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 0e5bc48..df75999 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -947,9 +947,8 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
/*UseBlockValue*/ false));
}
- ValueLatticeElement Result = TrueVal;
- Result.mergeIn(FalseVal);
- return Result;
+ TrueVal.mergeIn(FalseVal);
+ return TrueVal;
}
std::optional<ConstantRange>
@@ -1778,9 +1777,8 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
assert(OptResult && "Value not available after solving");
}
- ValueLatticeElement Result = *OptResult;
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
- return Result;
+ LLVM_DEBUG(dbgs() << " Result = " << *OptResult << "\n");
+ return *OptResult;
}
ValueLatticeElement LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e06b095..425420f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15740,19 +15740,26 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
GetNextSCEVDividesByDivisor(One, DividesBy);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
+ // LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
+ // but creating the subtraction eagerly is expensive. Track the
+ // inequalities in a separate map, and materialize the rewrite lazily
+ // when encountering a suitable subtraction while re-writing.
if (LHS->getType()->isPointerTy()) {
LHS = SE.getLosslessPtrToIntExpr(LHS);
RHS = SE.getLosslessPtrToIntExpr(RHS);
if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
break;
}
- auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
- const SCEV *Sub = SE.getMinusSCEV(A, B);
- AddRewrite(Sub, Sub,
- SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
- };
- AddSubRewrite(LHS, RHS);
- AddSubRewrite(RHS, LHS);
+ const SCEVConstant *C;
+ const SCEV *A, *B;
+ if (match(RHS, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
+ match(LHS, m_scev_Add(m_scev_Specific(C), m_SCEV(B)))) {
+ RHS = A;
+ LHS = B;
+ }
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ Guards.NotEqual.insert({LHS, RHS});
continue;
}
break;
@@ -15886,13 +15893,15 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
class SCEVLoopGuardRewriter
: public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
const DenseMap<const SCEV *, const SCEV *> &Map;
+ const SmallDenseSet<std::pair<const SCEV *, const SCEV *>> &NotEqual;
SCEV::NoWrapFlags FlagMask = SCEV::FlagAnyWrap;
public:
SCEVLoopGuardRewriter(ScalarEvolution &SE,
const ScalarEvolution::LoopGuards &Guards)
- : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap) {
+ : SCEVRewriteVisitor(SE), Map(Guards.RewriteMap),
+ NotEqual(Guards.NotEqual) {
if (Guards.PreserveNUW)
FlagMask = ScalarEvolution::setFlags(FlagMask, SCEV::FlagNUW);
if (Guards.PreserveNSW)
@@ -15947,14 +15956,36 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ // Helper to check if S is a subtraction (A - B) where A != B, and if so,
+ // return UMax(S, 1).
+ auto RewriteSubtraction = [&](const SCEV *S) -> const SCEV * {
+ const SCEV *LHS, *RHS;
+ if (MatchBinarySub(S, LHS, RHS)) {
+ if (LHS > RHS)
+ std::swap(LHS, RHS);
+ if (NotEqual.contains({LHS, RHS}))
+ return SE.getUMaxExpr(S, SE.getOne(S->getType()));
+ }
+ return nullptr;
+ };
+
+ // Check if Expr itself is a subtraction pattern with guard info.
+ if (const SCEV *Rewritten = RewriteSubtraction(Expr))
+ return Rewritten;
+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
// (Const + A + B). There may be guard info for A + B, and if so, apply
// it.
// TODO: Could more generally apply guards to Add sub-expressions.
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
Expr->getNumOperands() == 3) {
- if (const SCEV *S = Map.lookup(
- SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
+ const SCEV *Add =
+ SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2));
+ if (const SCEV *Rewritten = RewriteSubtraction(Add))
+ return SE.getAddExpr(
+ Expr->getOperand(0), Rewritten,
+ ScalarEvolution::maskFlags(Expr->getNoWrapFlags(), FlagMask));
+ if (const SCEV *S = Map.lookup(Add))
return SE.getAddExpr(Expr->getOperand(0), S);
}
SmallVector<const SCEV *, 2> Operands;
@@ -15989,7 +16020,7 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
}
};
- if (RewriteMap.empty())
+ if (RewriteMap.empty() && NotEqual.empty())
return Expr;
SCEVLoopGuardRewriter Rewriter(SE, *this);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 72582d7..567acf7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1039,12 +1039,17 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
} else
NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
- if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName())
+ if (auto Lang = DIUnit->getSourceLanguage(); Lang.hasVersionedName()) {
NewCU.addUInt(Die, dwarf::DW_AT_language_name, dwarf::DW_FORM_data2,
Lang.getName());
- else
+
+ if (uint32_t LangVersion = Lang.getVersion(); LangVersion != 0)
+ NewCU.addUInt(Die, dwarf::DW_AT_language_version, /*Form=*/std::nullopt,
+ LangVersion);
+ } else {
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
Lang.getName());
+ }
NewCU.addString(Die, dwarf::DW_AT_name, FN);
StringRef SysRoot = DIUnit->getSysRoot();
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53cf004..e45cac8 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -2027,13 +2027,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2041,9 +2041,8 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- MPM.addPass( \
- createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
- CREATE_PASS(Params.get()), false, false))); \
+ MPM.addPass(createModuleToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
return Error::success(); \
}
#include "PassRegistry.def"
@@ -2142,13 +2141,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
- createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false))); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2156,9 +2155,8 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- CGPM.addPass( \
- createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
- CREATE_PASS(Params.get()), false, false))); \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false))); \
return Error::success(); \
}
#include "PassRegistry.def"
@@ -2191,11 +2189,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
return Err;
// Add the nested pass manager with the appropriate adaptor.
bool UseMemorySSA = (Name == "loop-mssa");
- bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
- return Pipeline.Name.contains("simple-loop-unswitch");
- });
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
- UseBFI));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA));
return Error::success();
}
if (Name == "machine-function") {
@@ -2248,12 +2243,12 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
// The risk is that it may become obsolete if we're not careful.
#define LOOPNEST_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
return Error::success(); \
}
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false)); \
return Error::success(); \
}
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -2261,8 +2256,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
auto Params = parsePassParameters(PARSER, Name, NAME); \
if (!Params) \
return Params.takeError(); \
- FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
- false, false)); \
+ FPM.addPass( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), false)); \
return Error::success(); \
}
#include "PassRegistry.def"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 3f3939eaf..bd03ac0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -519,16 +519,14 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ /*UseMemorySSA=*/true));
FPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -710,8 +708,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ /*UseMemorySSA=*/true));
FPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
@@ -719,8 +716,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/false));
// Delete small array after loop unroll.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -773,7 +769,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/true));
FPM.addPass(CoroElidePass());
@@ -842,8 +838,7 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
createFunctionToLoopPassAdaptor(
LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false),
+ /*UseMemorySSA=*/false),
PTO.EagerlyInvalidateAnalyses));
}
}
@@ -1358,8 +1353,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
ExtraPasses.addPass(
- createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
ExtraPasses.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
ExtraPasses.addPass(InstCombinePass());
@@ -1438,7 +1432,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*UseMemorySSA=*/true));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1520,7 +1514,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*USeMemorySSA=*/true));
}
OptimizePM.addPass(Float2IntPass());
@@ -1560,8 +1554,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (PTO.LoopInterchange)
LPM.addPass(LoopInterchangePass());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
// FIXME: This may not be the right place in the pipeline.
// We need to have the data to support the right place.
@@ -2133,7 +2127,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MainFPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+ /*USeMemorySSA=*/true));
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
@@ -2163,8 +2157,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
PTO.ForgetAllSCEVInLoopUnroll));
// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
+ MainFPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
MainFPM.addPass(LoopDistributePass());
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 8623c06..b4de79a 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -130,44 +130,46 @@ struct fltSemantics {
bool hasSignBitInMSB = true;
};
-static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
-static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
-static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
-static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
-static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
-static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
-static constexpr fltSemantics semFloat8E5M2FNUZ = {
+constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
+constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
+constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
+constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
+constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
+constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
+constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
-static constexpr fltSemantics semFloat8E4M3FN = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
+constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
-static constexpr fltSemantics semFloat8E4M3FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
-static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
-static constexpr fltSemantics semFloat8E8M0FNU = {127,
- -127,
- 1,
- 8,
- fltNonfiniteBehavior::NanOnly,
- fltNanEncoding::AllOnes,
- false,
- false,
- false};
-
-static constexpr fltSemantics semFloat6E3M2FN = {
+constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
+constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
+constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
+ 127,
+ -127,
+ 1,
+ 8,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::AllOnes,
+ false,
+ false,
+ false};
+
+constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat6E2M3FN = {
+constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semFloat4E2M1FN = {
+constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
-static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
-static constexpr fltSemantics semBogus = {0, 0, 0, 0};
-static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
- 53 + 53, 128};
+constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
+ 80};
+constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
+constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
+constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
+ 1023, -1022 + 53, 53 + 53, 128};
const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
switch (S) {
@@ -261,36 +263,6 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
llvm_unreachable("Unknown floating semantics");
}
-const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
-const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
-const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
-const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
-const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
-const fltSemantics &APFloatBase::PPCDoubleDouble() {
- return semPPCDoubleDouble;
-}
-const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
- return semPPCDoubleDoubleLegacy;
-}
-const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
-const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
-const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
-const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
-const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
- return semFloat8E4M3B11FNUZ;
-}
-const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
-const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
-const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
-const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
-const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
-const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
-const fltSemantics &APFloatBase::x87DoubleExtended() {
- return semX87DoubleExtended;
-}
-const fltSemantics &APFloatBase::Bogus() { return semBogus; }
-
bool APFloatBase::isRepresentableBy(const fltSemantics &A,
const fltSemantics &B) {
return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
@@ -1029,7 +1001,7 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// For x87 extended precision, we want to make a NaN, not a
// pseudo-NaN. Maybe we should expose the ability to make
// pseudo-NaNs?
- if (semantics == &semX87DoubleExtended)
+ if (semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significand, QNaNBit + 1);
}
@@ -1054,7 +1026,7 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
category = rhs.category;
sign = rhs.sign;
- rhs.semantics = &semBogus;
+ rhs.semantics = &APFloatBase::semBogus;
return *this;
}
@@ -1247,7 +1219,7 @@ IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
assign(rhs);
}
-IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
+IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
*this = std::move(rhs);
}
@@ -2607,8 +2579,8 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
shift = toSemantics.precision - fromSemantics.precision;
bool X86SpecialNan = false;
- if (&fromSemantics == &semX87DoubleExtended &&
- &toSemantics != &semX87DoubleExtended && category == fcNaN &&
+ if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
+ &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
(!(*significandParts() & 0x8000000000000000ULL) ||
!(*significandParts() & 0x4000000000000000ULL))) {
// x86 has some unusual NaNs which cannot be represented in any other
@@ -2694,7 +2666,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// For x87 extended precision, we want to make a NaN, not a special NaN if
// the input wasn't special either.
- if (!X86SpecialNan && semantics == &semX87DoubleExtended)
+ if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
APInt::tcSetBit(significandParts(), semantics->precision - 1);
// Convert of sNaN creates qNaN and raises an exception (invalid op).
@@ -3530,7 +3502,8 @@ hash_code hash_value(const IEEEFloat &Arg) {
// the actual IEEE respresentations. We compensate for that here.
APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
uint64_t myexponent, mysignificand;
@@ -3560,7 +3533,8 @@ APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
}
APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
assert(partCount()==2);
uint64_t words[2];
@@ -3574,14 +3548,14 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
// Declare fltSemantics before APFloat that uses it (and
// saves pointer to it) to ensure correct destruction order.
fltSemantics extendedSemantics = *semantics;
- extendedSemantics.minExponent = semIEEEdouble.minExponent;
+ extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
IEEEFloat extended(*this);
fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
IEEEFloat u(extended);
- fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK || fs == opInexact);
(void)fs;
words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
@@ -3597,7 +3571,7 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
IEEEFloat v(extended);
v.subtract(u, rmNearestTiesToEven);
- fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
+ fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
@@ -3611,8 +3585,9 @@ APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
template <const fltSemantics &S>
APInt IEEEFloat::convertIEEEFloatToAPInt() const {
assert(semantics == &S);
- const int bias =
- (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
+ const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
+ ? -S.minExponent
+ : -(S.minExponent - 1);
constexpr unsigned int trailing_significand_bits = S.precision - 1;
constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
constexpr integerPart integer_bit =
@@ -3677,87 +3652,87 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
assert(partCount() == 2);
- return convertIEEEFloatToAPInt<semIEEEquad>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
}
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEdouble>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
}
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEsingle>();
+ return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
}
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semBFloat>();
+ return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
}
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
assert(partCount()==1);
- return convertIEEEFloatToAPInt<semIEEEhalf>();
+ return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
}
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
}
APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
}
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
}
APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
}
APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E3M4>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
}
APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloatTF32>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
}
APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
}
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
}
APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
}
APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
assert(partCount() == 1);
- return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
+ return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
}
// This function creates an APInt that is just a bit map of the floating
@@ -3765,74 +3740,77 @@ APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
// and treating the result as a normal integer is unlikely to be useful.
APInt IEEEFloat::bitcastToAPInt() const {
- if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
return convertHalfAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semBFloat)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
return convertBFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return convertFloatAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return convertDoubleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return convertQuadrupleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
return convertFloat8E5M2FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
return convertFloat8E4M3APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
return convertFloat8E4M3FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+ if (semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
return convertFloat8E4M3B11FNUZAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
return convertFloat8E3M4APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
return convertFloatTF32APFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
return convertFloat8E8M0FNUAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
return convertFloat6E3M2FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
return convertFloat6E2M3FNAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
+ if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
return convertFloat4E2M1FNAPFloatToAPInt();
- assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
+ assert(semantics ==
+ (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
}
float IEEEFloat::convertToFloat() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
"Float semantics are not IEEEsingle");
APInt api = bitcastToAPInt();
return api.bitsToFloat();
}
double IEEEFloat::convertToDouble() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
"Float semantics are not IEEEdouble");
APInt api = bitcastToAPInt();
return api.bitsToDouble();
@@ -3840,7 +3818,7 @@ double IEEEFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 IEEEFloat::convertToQuad() const {
- assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
+ assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
"Float semantics are not IEEEquads");
APInt api = bitcastToAPInt();
return api.bitsToQuad();
@@ -3861,7 +3839,7 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
uint64_t mysignificand = i1;
uint8_t myintegerbit = mysignificand >> 63;
- initialize(&semX87DoubleExtended);
+ initialize(&APFloatBase::semX87DoubleExtended);
assert(partCount()==2);
sign = static_cast<unsigned int>(i2>>15);
@@ -3893,14 +3871,16 @@ void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
// Get the first double and convert to our format.
initFromDoubleAPInt(APInt(64, i1));
- fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
// Unless we have a special case, add in second double.
if (isFiniteNonZero()) {
- IEEEFloat v(semIEEEdouble, APInt(64, i2));
- fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
+ IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
+ fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
+ &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
@@ -3918,7 +3898,7 @@ void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
uint64_t val = api.getRawData()[0];
uint64_t myexponent = (val & exponent_mask);
- initialize(&semFloat8E8M0FNU);
+ initialize(&APFloatBase::semFloat8E8M0FNU);
assert(partCount() == 1);
// This format has unsigned representation only
@@ -4025,109 +4005,109 @@ void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
}
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEquad>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
}
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEdouble>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
}
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEsingle>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
}
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- initFromIEEEAPInt<semBFloat>(api);
+ initFromIEEEAPInt<APFloatBase::semBFloat>(api);
}
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- initFromIEEEAPInt<semIEEEhalf>(api);
+ initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
}
void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
}
void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
}
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
}
void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
}
void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
- initFromIEEEAPInt<semFloat8E3M4>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
}
void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
- initFromIEEEAPInt<semFloatTF32>(api);
+ initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
}
void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E3M2FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
}
void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat6E2M3FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
}
void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
- initFromIEEEAPInt<semFloat4E2M1FN>(api);
+ initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
}
/// Treat api as containing the bits of a floating point number.
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
assert(api.getBitWidth() == Sem->sizeInBits);
- if (Sem == &semIEEEhalf)
+ if (Sem == &APFloatBase::semIEEEhalf)
return initFromHalfAPInt(api);
- if (Sem == &semBFloat)
+ if (Sem == &APFloatBase::semBFloat)
return initFromBFloatAPInt(api);
- if (Sem == &semIEEEsingle)
+ if (Sem == &APFloatBase::semIEEEsingle)
return initFromFloatAPInt(api);
- if (Sem == &semIEEEdouble)
+ if (Sem == &APFloatBase::semIEEEdouble)
return initFromDoubleAPInt(api);
- if (Sem == &semX87DoubleExtended)
+ if (Sem == &APFloatBase::semX87DoubleExtended)
return initFromF80LongDoubleAPInt(api);
- if (Sem == &semIEEEquad)
+ if (Sem == &APFloatBase::semIEEEquad)
return initFromQuadrupleAPInt(api);
- if (Sem == &semPPCDoubleDoubleLegacy)
+ if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
return initFromPPCDoubleDoubleLegacyAPInt(api);
- if (Sem == &semFloat8E5M2)
+ if (Sem == &APFloatBase::semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
- if (Sem == &semFloat8E5M2FNUZ)
+ if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
return initFromFloat8E5M2FNUZAPInt(api);
- if (Sem == &semFloat8E4M3)
+ if (Sem == &APFloatBase::semFloat8E4M3)
return initFromFloat8E4M3APInt(api);
- if (Sem == &semFloat8E4M3FN)
+ if (Sem == &APFloatBase::semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
- if (Sem == &semFloat8E4M3FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
return initFromFloat8E4M3FNUZAPInt(api);
- if (Sem == &semFloat8E4M3B11FNUZ)
+ if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
return initFromFloat8E4M3B11FNUZAPInt(api);
- if (Sem == &semFloat8E3M4)
+ if (Sem == &APFloatBase::semFloat8E3M4)
return initFromFloat8E3M4APInt(api);
- if (Sem == &semFloatTF32)
+ if (Sem == &APFloatBase::semFloatTF32)
return initFromFloatTF32APInt(api);
- if (Sem == &semFloat8E8M0FNU)
+ if (Sem == &APFloatBase::semFloat8E8M0FNU)
return initFromFloat8E8M0FNUAPInt(api);
- if (Sem == &semFloat6E3M2FN)
+ if (Sem == &APFloatBase::semFloat6E3M2FN)
return initFromFloat6E3M2FNAPInt(api);
- if (Sem == &semFloat6E2M3FN)
+ if (Sem == &APFloatBase::semFloat6E2M3FN)
return initFromFloat6E2M3FNAPInt(api);
- if (Sem == &semFloat4E2M1FN)
+ if (Sem == &APFloatBase::semFloat4E2M1FN)
return initFromFloat4E2M1FNAPInt(api);
llvm_unreachable("unsupported semantics");
@@ -4202,11 +4182,11 @@ IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
}
IEEEFloat::IEEEFloat(float f) {
- initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
+ initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
}
IEEEFloat::IEEEFloat(double d) {
- initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
+ initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
}
namespace {
@@ -4815,38 +4795,40 @@ IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
: Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
- : Semantics(&S),
- Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
- APFloat(semIEEEdouble, uninitialized)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S), Floats(new APFloat[2]{
+ APFloat(APFloatBase::semIEEEdouble, uninitialized),
+ APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
- : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
- APFloat(semIEEEdouble)}) {
- assert(Semantics == &semPPCDoubleDouble);
+ : Semantics(&S),
+ Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
+ APFloat(APFloatBase::semIEEEdouble)}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
: Semantics(&S),
Floats(new APFloat[2]{
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
- assert(Semantics == &semPPCDoubleDouble);
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
APFloat &&Second)
: Semantics(&S),
Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
- assert(Semantics == &semPPCDoubleDouble);
- assert(&Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Floats[1].getSemantics() == &semIEEEdouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
+ assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
}
DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
@@ -4854,14 +4836,14 @@ DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
APFloat(RHS.Floats[1])}
: nullptr) {
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
: Semantics(RHS.Semantics), Floats(RHS.Floats) {
- RHS.Semantics = &semBogus;
+ RHS.Semantics = &APFloatBase::semBogus;
RHS.Floats = nullptr;
- assert(Semantics == &semPPCDoubleDouble);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble);
}
DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
@@ -5009,12 +4991,12 @@ APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
CC(RHS.Floats[1]);
- assert(&A.getSemantics() == &semIEEEdouble);
- assert(&AA.getSemantics() == &semIEEEdouble);
- assert(&C.getSemantics() == &semIEEEdouble);
- assert(&CC.getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
- assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
+ assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
+ assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
return Out.addImpl(A, AA, C, CC, RM);
}
@@ -5119,28 +5101,32 @@ APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.divide(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret =
- Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.remainder(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.mod(
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5148,17 +5134,21 @@ APFloat::opStatus
DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
const DoubleAPFloat &Addend,
APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
auto Ret = Tmp.fusedMultiplyAdd(
- APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
- APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
+ Multiplicand.bitcastToAPInt()),
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
+ RM);
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
const APFloat &Hi = getFirst();
const APFloat &Lo = getSecond();
@@ -5309,22 +5299,28 @@ void DoubleAPFloat::makeZero(bool Neg) {
}
void DoubleAPFloat::makeLargest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
- Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
+ Floats[1] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
if (Neg)
changeSign();
}
void DoubleAPFloat::makeSmallest(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
Floats[0].makeSmallest(Neg);
Floats[1].makeZero(/* Neg = */ false);
}
void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ Floats[0] =
+ APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
if (Neg)
Floats[0].changeSign();
Floats[1].makeZero(/* Neg = */ false);
@@ -5355,7 +5351,8 @@ hash_code hash_value(const DoubleAPFloat &Arg) {
}
APInt DoubleAPFloat::bitcastToAPInt() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
uint64_t Data[] = {
Floats[0].bitcastToAPInt().getRawData()[0],
Floats[1].bitcastToAPInt().getRawData()[0],
@@ -5365,10 +5362,11 @@ APInt DoubleAPFloat::bitcastToAPInt() const {
Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
roundingMode RM) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy);
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
auto Ret = Tmp.convertFromString(S, RM);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
return Ret;
}
@@ -5379,7 +5377,8 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
// nextUp must choose the smallest output > input that follows these rules.
// nexDown must choose the largest output < input that follows these rules.
APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// nextDown(x) = -nextUp(-x)
if (nextDown) {
changeSign();
@@ -5481,7 +5480,8 @@ APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
roundingMode RM, bool *IsExact) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
// If Hi is not finite, or Lo is zero, the value is entirely represented
// by Hi. Delegate to the simpler single-APFloat conversion.
@@ -5761,8 +5761,9 @@ unsigned int DoubleAPFloat::convertToHexString(char *DST,
unsigned int HexDigits,
bool UpperCase,
roundingMode RM) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.convertToHexString(DST, HexDigits, UpperCase, RM);
}
@@ -5799,7 +5800,8 @@ bool DoubleAPFloat::isLargest() const {
}
bool DoubleAPFloat::isInteger() const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
return Floats[0].isInteger() && Floats[1].isInteger();
}
@@ -5807,8 +5809,9 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
unsigned FormatPrecision,
unsigned FormatMaxPadding,
bool TruncateZero) const {
- assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
+ "Unexpected Semantics");
+ APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
}
@@ -5840,14 +5843,17 @@ int ilogb(const DoubleAPFloat &Arg) {
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(),
+ scalbn(Arg.Floats[0], Exp, RM),
scalbn(Arg.Floats[1], Exp, RM));
}
DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
APFloat::roundingMode RM) {
- assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
+ "Unexpected Semantics");
// Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
// [1.0, 2.0).
@@ -5943,7 +5949,8 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
}
APFloat First = scalbn(Hi, -Exp, RM);
- return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
+ return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), std::move(First),
+ std::move(Second));
}
} // namespace detail
@@ -5955,9 +5962,8 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
const fltSemantics& S = F.getSemantics();
- new (&Double)
- DoubleAPFloat(Semantics, APFloat(std::move(F), S),
- APFloat(semIEEEdouble));
+ new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
+ APFloat(APFloatBase::IEEEdouble()));
return;
}
llvm_unreachable("Unexpected semantics");
@@ -6065,8 +6071,9 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
return U.IEEE.convert(ToSemantics, RM, losesInfo);
if (usesLayout<IEEEFloat>(getSemantics()) &&
usesLayout<DoubleAPFloat>(ToSemantics)) {
- assert(&ToSemantics == &semPPCDoubleDouble);
- auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
+ assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
+ auto Ret =
+ U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
*this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
return Ret;
}
@@ -6113,13 +6120,15 @@ APFloat::opStatus APFloat::convertToInteger(APSInt &result,
}
double APFloat::convertToDouble() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
return getIEEE().convertToDouble();
assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
"Float semantics is not representable by IEEEdouble");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToDouble();
@@ -6127,13 +6136,14 @@ double APFloat::convertToDouble() const {
#ifdef HAS_IEE754_FLOAT128
float128 APFloat::convertToQuad() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
+ if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
return getIEEE().convertToQuad();
assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
"Float semantics is not representable by IEEEquad");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToQuad();
@@ -6141,18 +6151,84 @@ float128 APFloat::convertToQuad() const {
#endif
float APFloat::convertToFloat() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
return getIEEE().convertToFloat();
assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
"Float semantics is not representable by IEEEsingle");
APFloat Temp = *this;
bool LosesInfo;
- opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
(void)St;
return Temp.getIEEE().convertToFloat();
}
+APFloat::Storage::~Storage() {
+ if (usesLayout<IEEEFloat>(*semantics)) {
+ IEEE.~IEEEFloat();
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*semantics)) {
+ Double.~DoubleAPFloat();
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(RHS.IEEE);
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(RHS.Double);
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(std::move(RHS.IEEE));
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(std::move(RHS.Double));
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = RHS.IEEE;
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = RHS.Double;
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(RHS);
+ }
+ return *this;
+}
+
+APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
+ if (usesLayout<IEEEFloat>(*semantics) &&
+ usesLayout<IEEEFloat>(*RHS.semantics)) {
+ IEEE = std::move(RHS.IEEE);
+ } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+ usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ Double = std::move(RHS.Double);
+ } else if (this != &RHS) {
+ this->~Storage();
+ new (this) Storage(std::move(RHS));
+ }
+ return *this;
+}
+
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1b559a6..8ed4062 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -514,8 +514,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
- Legal);
+ setOperationAction({ISD::ABS, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
+ MVT::i32, Legal);
setOperationAction(
{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e0375ea..e3f3aba 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -892,6 +892,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// have EXEC as implicit destination. Issue a warning if encoding for
// vdst is not EXEC.
if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
+ MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
if (Bytes_[0] != ExecEncoding)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 979a8b0..4b22c68 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
+#include <array>
namespace llvm {
@@ -45,7 +46,7 @@ struct GCNRegPressure {
return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
}
- void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
+ void clear() { Value.fill(0); }
unsigned getNumRegs(RegKind Kind) const {
assert(Kind < TOTAL_KINDS);
@@ -127,9 +128,7 @@ struct GCNRegPressure {
bool less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
- bool operator==(const GCNRegPressure &O) const {
- return std::equal(&Value[0], &Value[ValueArraySize], O.Value);
- }
+ bool operator==(const GCNRegPressure &O) const { return Value == O.Value; }
bool operator!=(const GCNRegPressure &O) const {
return !(*this == O);
@@ -160,7 +159,7 @@ private:
/// Pressure for all register kinds (first all regular registers kinds, then
/// all tuple register kinds).
- unsigned Value[ValueArraySize];
+ std::array<unsigned, ValueArraySize> Value;
static unsigned getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..09ef6ac 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
+ // 32-bit ABS is legal for AMDGPU except for R600
+ setOperationAction(ISD::ABS, MVT::i32, Expand);
+
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d516330..50447f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9072,6 +9072,67 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
MachineOperand &Src1 = Inst.getOperand(2);
const DebugLoc &DL = Inst.getDebugLoc();
+ if (ST.useRealTrue16Insts()) {
+ Register SrcReg0, SrcReg1;
+ if (!Src0.isReg() || !RI.isVGPR(MRI, Src0.getReg())) {
+ SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
+ } else {
+ SrcReg0 = Src0.getReg();
+ }
+
+ if (!Src1.isReg() || !RI.isVGPR(MRI, Src1.getReg())) {
+ SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
+ } else {
+ SrcReg1 = Src1.getReg();
+ }
+
+ bool isSrc0Reg16 = MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
+ bool isSrc1Reg16 = MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
+
+ auto NewMI = BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), ResultReg);
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_PACK_LL_B32_B16:
+ NewMI
+ .addReg(SrcReg0, 0,
+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0,
+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_LH_B32_B16:
+ NewMI
+ .addReg(SrcReg0, 0,
+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_HL_B32_B16:
+ NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0,
+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
+ .addImm(AMDGPU::hi16);
+ break;
+ case AMDGPU::S_PACK_HH_B32_B16:
+ NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::lo16)
+ .addReg(SrcReg1, 0, AMDGPU::hi16)
+ .addImm(AMDGPU::hi16);
+ break;
+ default:
+ llvm_unreachable("unhandled s_pack_* instruction");
+ }
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+ return;
+ }
+
switch (Inst.getOpcode()) {
case AMDGPU::S_PACK_LL_B32_B16: {
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 01a40c1..7431e11 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -47,9 +47,6 @@ private:
const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
- // Check if the machine instruction being processed is a supported packed
- // instruction.
- bool isUnpackingSupportedInstr(MachineInstr &MI) const;
// Creates a list of packed instructions following an MFMA that are suitable
// for unpacking.
void collectUnpackingCandidates(MachineInstr &BeginMI,
@@ -454,23 +451,6 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}
-// If support is extended to new operations, add tests in
-// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
-bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
- if (!TII->isNeverCoissue(MI))
- return false;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::V_PK_ADD_F32:
- case AMDGPU::V_PK_MUL_F32:
- case AMDGPU::V_PK_FMA_F32:
- return true;
- default:
- return false;
- }
- llvm_unreachable("Fully covered switch");
-}
-
bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
unsigned OpCode = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
@@ -612,10 +592,13 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {
MachineInstr &Instr = *I;
+ uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
+ bool IsUnpackable =
+ !(UnpackedOpCode == std::numeric_limits<uint16_t>::max());
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
- (TII->isNeverCoissue(Instr) && !isUnpackingSupportedInstr(Instr)) ||
+ (TII->isNeverCoissue(Instr) && !IsUnpackable) ||
(SIInstrInfo::modifiesModeRegister(Instr) &&
Instr.modifiesRegister(AMDGPU::EXEC, TRI)))
return;
@@ -639,7 +622,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))
return;
}
- if (!isUnpackingSupportedInstr(Instr))
+ if (!IsUnpackable)
continue;
if (canUnpackingClobberRegister(Instr))
@@ -687,8 +670,8 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
bool IsHiBits) {
MachineBasicBlock &MBB = *I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
- const MachineOperand *SrcMO2 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
+ const MachineOperand *SrcMO0 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
+ const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
Register DstReg = I.getOperand(0).getReg();
unsigned OpCode = I.getOpcode();
Register UnpackedDstReg = IsHiBits ? TRI->getSubReg(DstReg, AMDGPU::sub1)
@@ -702,15 +685,15 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
MachineInstrBuilder NewMI = BuildMI(MBB, I, DL, TII->get(UnpackedOpcode));
NewMI.addDef(UnpackedDstReg); // vdst
- addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO1);
- addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO2);
+ addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO0);
+ addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO1);
if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
- const MachineOperand *SrcMO3 =
+ const MachineOperand *SrcMO2 =
TII->getNamedOperand(I, AMDGPU::OpName::src2);
unsigned Src2Mods =
TII->getNamedOperand(I, AMDGPU::OpName::src2_modifiers)->getImm();
- addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO3);
+ addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO2);
}
NewMI.addImm(ClampVal); // clamp
// Packed instructions do not support output modifiers. safe to assign them 0
@@ -787,9 +770,13 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
// TODO: Fold this into previous block, if possible. Evaluate and handle any
// side effects.
+
+ // Perform the extra MF scans only for supported archs
+ if (!ST.hasGFX940Insts())
+ return Changed;
for (MachineBasicBlock &MBB : MF) {
- // Unpack packed instructions overlapped by MFMAs. This allows the compiler
- // to co-issue unpacked instructions with MFMA
+ // Unpack packed instructions overlapped by MFMAs. This allows the
+ // compiler to co-issue unpacked instructions with MFMA
auto SchedModel = TII->getSchedModel();
SetVector<MachineInstr *> InstrsToUnpack;
for (auto &MI : make_early_inc_range(MBB.instrs())) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index eb87558..169465e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24830,7 +24830,8 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// instruction, as it is usually smaller than the alternative sequence.
// TODO: Add vector division?
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
- return OptSize && !VT.isVector();
+ return OptSize && !VT.isVector() &&
+ VT.getSizeInBits() <= getMaxDivRemBitWidthSupported();
}
bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 1b7cb9b..636e31c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -699,7 +699,8 @@ public:
"Can't encode VTYPE for uninitialized or unknown");
if (TWiden != 0)
return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic,
+ AltFmt);
}
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ddb53a2..12f776b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3775,11 +3775,13 @@ std::string RISCVInstrInfo::createMIROperandComment(
#define CASE_VFMA_OPCODE_VV(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VV, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
#define CASE_VFMA_SPLATS(OP) \
CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP##_ALT, VFPR16, E16): \
case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
// clang-format on
@@ -4003,11 +4005,13 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VV, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP##_ALT, NEWOP##_ALT, VFPR16, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
// clang-format on
@@ -4469,6 +4473,20 @@ bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const {
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16)
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16)
// clang-format on
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
@@ -4478,6 +4496,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return nullptr;
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWADD_ALT_WV):
+ case CASE_FP_WIDEOP_OPCODE_LMULS_ALT(FWSUB_ALT_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWADD_WV):
case CASE_FP_WIDEOP_OPCODE_LMULS(FWSUB_WV): {
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
@@ -4494,6 +4514,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
llvm_unreachable("Unexpected opcode");
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWADD_ALT_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_ALT(FWSUB_ALT_WV)
}
// clang-format on
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index c9c1246..9358486 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -44,6 +44,336 @@ let Predicates = [HasStdExtZvfbfmin] in {
let mayRaiseFPException = true, Predicates = [HasStdExtZvfbfwma] in
defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM;
+defset list<VTypeInfoToWide> AllWidenableIntToBF16Vectors = {
+ def : VTypeInfoToWide<VI8MF8, VBF16MF4>;
+ def : VTypeInfoToWide<VI8MF4, VBF16MF2>;
+ def : VTypeInfoToWide<VI8MF2, VBF16M1>;
+ def : VTypeInfoToWide<VI8M1, VBF16M2>;
+ def : VTypeInfoToWide<VI8M2, VBF16M4>;
+ def : VTypeInfoToWide<VI8M4, VBF16M8>;
+}
+
+multiclass VPseudoVALU_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVALU_VF_RM_BF16 {
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFWALU_WV_WF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_WV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVFMUL_VV_VF_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryFV_VV_RM<m, 16/*sew*/>,
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, f, f.SEW>,
+ SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWMUL_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=16>,
+ SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVMAC_VV_VF_AAXA_RM_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, 16/*sew*/>,
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, f.SEW>,
+ SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ "ReadVFMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVWMAC_VV_VF_RM_BF16 {
+ foreach m = MxListFW in {
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=16>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, 16/*sew*/>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxListFW in {
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
+ SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>;
+ }
+}
+
+multiclass VPseudoVRCP_V_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVRCP_V_RM_BF16 {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E16"
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ def "_V_" # mx # "_E16_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, 16/*sew*/,
+ forcePassthruRead=true>;
+ }
+ }
+}
+
+multiclass VPseudoVMAX_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFMinMaxV", "ReadVFMinMaxV", "ReadVFMinMaxV",
+ m.MX, 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFMinMaxF", "ReadVFMinMaxV", "ReadVFMinMaxF",
+ m.MX, f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVSGNJ_VV_VF_BF16 {
+ foreach m = MxListF in {
+ defm "" : VPseudoBinaryV_VV<m, sew=16>,
+ SchedBinary<"WriteVFSgnjV", "ReadVFSgnjV", "ReadVFSgnjV", m.MX,
+ 16/*sew*/, forcePassthruRead=true>;
+ }
+
+ defvar f = SCALAR_F16;
+ foreach m = f.MxList in {
+ defm "" : VPseudoBinaryV_VF<m, f, f.SEW>,
+ SchedBinary<"WriteVFSgnjF", "ReadVFSgnjV", "ReadVFSgnjF", m.MX,
+ f.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVWCVTF_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=8,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 8/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVWCVTD_V_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint, sew=16,
+ TargetConstraintType=3>,
+ SchedUnary<"WriteVFWCvtFToFV", "ReadVFWCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+multiclass VPseudoVNCVTD_W_RM_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m,
+ constraint, sew=16,
+ TargetConstraintType=2>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16/*sew*/,
+ forcePassthruRead=true>;
+}
+
+let Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT in {
+let mayRaiseFPException = true in {
+defm PseudoVFADD_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFSUB_ALT : VPseudoVALU_VV_VF_RM_BF16;
+defm PseudoVFRSUB_ALT : VPseudoVALU_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWADD_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_VV_VF_RM_BF16;
+defm PseudoVFWADD_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+defm PseudoVFWSUB_ALT : VPseudoVFWALU_WV_WF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFMUL_ALT : VPseudoVFMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFWMUL_ALT : VPseudoVWMUL_VV_VF_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMACC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSAC_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMADD_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+defm PseudoVFNMSUB_ALT : VPseudoVMAC_VV_VF_AAXA_RM_BF16;
+}
+
+let mayRaiseFPException = true in {
+defm PseudoVFWMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMACC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+defm PseudoVFWNMSAC_ALT : VPseudoVWMAC_VV_VF_RM_BF16;
+}
+
+let mayRaiseFPException = true in
+defm PseudoVFRSQRT7_ALT : VPseudoVRCP_V_BF16;
+
+let mayRaiseFPException = true in
+defm PseudoVFREC7_ALT : VPseudoVRCP_V_RM_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVFMIN_ALT : VPseudoVMAX_VV_VF_BF16;
+defm PseudoVFMAX_ALT : VPseudoVMAX_VV_VF_BF16;
+}
+
+defm PseudoVFSGNJ_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJN_ALT : VPseudoVSGNJ_VV_VF_BF16;
+defm PseudoVFSGNJX_ALT : VPseudoVSGNJ_VV_VF_BF16;
+
+let mayRaiseFPException = true in {
+defm PseudoVMFEQ_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFNE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLT_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFLE_ALT : VPseudoVCMPM_VV_VF;
+defm PseudoVMFGT_ALT : VPseudoVCMPM_VF;
+defm PseudoVMFGE_ALT : VPseudoVCMPM_VF;
+}
+
+defm PseudoVFCLASS_ALT : VPseudoVCLS_V;
+
+defm PseudoVFMERGE_ALT : VPseudoVMRG_FM;
+
+defm PseudoVFMV_V_ALT : VPseudoVMV_F;
+
+let mayRaiseFPException = true in {
+defm PseudoVFWCVT_F_XU_ALT : VPseudoVWCVTF_V_BF16;
+defm PseudoVFWCVT_F_X_ALT : VPseudoVWCVTF_V_BF16;
+
+defm PseudoVFWCVT_F_F_ALT : VPseudoVWCVTD_V_BF16;
+} // mayRaiseFPException = true
+
+let mayRaiseFPException = true in {
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_XU_F_ALT : VPseudoVNCVTI_W_RM;
+defm PseudoVFNCVT_X_F_ALT : VPseudoVNCVTI_W_RM;
+}
+
+defm PseudoVFNCVT_RTZ_XU_F_ALT : VPseudoVNCVTI_W;
+defm PseudoVFNCVT_RTZ_X_F_ALT : VPseudoVNCVTI_W;
+
+defm PseudoVFNCVT_F_F_ALT : VPseudoVNCVTD_W_RM_BF16;
+
+defm PseudoVFNCVT_ROD_F_F_ALT : VPseudoVNCVTD_W_BF16;
+} // mayRaiseFPException = true
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ defvar f = SCALAR_F16;
+ let HasSEWOp = 1, BaseInstr = VFMV_F_S in
+ def "PseudoVFMV_" # f.FX # "_S_ALT" :
+ RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>,
+ Sched<[WriteVMovFS, ReadVMovFS]>;
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
+ Constraints = "$rd = $passthru" in
+ def "PseudoVFMV_S_" # f.FX # "_ALT" :
+ RISCVVPseudo<(outs VR:$rd),
+ (ins VR:$passthru, f.fprclass:$rs1, AVL:$vl, sew:$sew)>,
+ Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
+}
+
+defm PseudoVFSLIDE1UP_ALT : VPseudoVSLD1_VF<"@earlyclobber $rd">;
+defm PseudoVFSLIDE1DOWN_ALT : VPseudoVSLD1_VF;
+} // Predicates = [HasStdExtZvfbfa], AltFmtType = IS_ALTFMT
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@@ -108,6 +438,130 @@ let Predicates = [HasStdExtZvfbfmin] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+
+ defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
+ defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllBF16Vectors, uimm5>;
+ defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllBF16Vectors>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
+ defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
+
+ foreach fvti = AllBF16Vectors in {
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
+ fvti.Vector,
+ fvti.Vector, fvti.Vector, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ "V"#fvti.ScalarSuffix#"M",
+ fvti.Vector,
+ fvti.Vector, fvti.Scalar, fvti.Mask,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass,
+ fvti.RegClass, fvti.ScalarRegClass>;
+ defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
+ (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask VMV0:$vm), VLOpFrag)),
+ (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ fvti.RegClass:$passthru,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
+ (ivti.Vector fvti.RegClass:$rs1),
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector
+ (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
+ uimm5:$imm,
+ fvti.RegClass:$passthru,
+ (fvti.Mask VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
+ fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
+ (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
let Predicates = [HasStdExtZvfbfwma] in {
@@ -118,3 +572,224 @@ let Predicates = [HasStdExtZvfbfwma] in {
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16",
AllWidenableBF16ToFloatVectors>;
}
+
+multiclass VPatConversionVI_VF_BF16<string intrinsic, string instruction> {
+ foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionWF_VI_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionWF_VF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypeMinimalPredicates<fvti>.Predicates,
+ GetVTypeMinimalPredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass, isSEWAware>;
+ }
+}
+
+multiclass VPatConversionVI_WF_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVI_WF_RM_BF16<string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToBF16Vectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionRoundingMode<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WF_BF16<string intrinsic, string instruction,
+ bit isSEWAware = 0> {
+ foreach fvtiToFWti = AllWidenableBF16ToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversion<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
+ }
+}
+
+let Predicates = [HasStdExtZvfbfa] in {
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB_ALT",
+ AllBF16Vectors, isSEWAware = 1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmin", "PseudoVFMIN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfmax", "PseudoVFMAX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnj", "PseudoVFSGNJ_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjn", "PseudoVFSGNJN_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vfsgnjx", "PseudoVFSGNJX_ALT",
+ AllBF16Vectors, isSEWAware=1>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfeq", "PseudoVMFEQ_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfle", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmflt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VV_VX<"int_riscv_vmfne", "PseudoVMFNE_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfgt", "PseudoVMFGT_ALT", AllBF16Vectors>;
+defm : VPatBinaryM_VX<"int_riscv_vmfge", "PseudoVMFGE_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfgt", "PseudoVMFLT_ALT", AllBF16Vectors>;
+defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE_ALT", AllBF16Vectors>;
+defm : VPatConversionVI_VF_BF16<"int_riscv_vfclass", "PseudoVFCLASS_ALT">;
+foreach vti = AllBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE_ALT",
+ "V"#vti.ScalarSuffix#"M",
+ vti.Vector,
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VI_BF16<"int_riscv_vfwcvt_f_x_v", "PseudoVFWCVT_F_X_ALT",
+ isSEWAware=1>;
+defm : VPatConversionWF_VF_BF16<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F_ALT">;
+defm : VPatConversionVI_WF_RM_BF16<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F_ALT">;
+defm : VPatConversionVI_WF_BF16<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F_ALT">;
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F_ALT",
+ AllWidenableBF16ToFloatVectors, isSEWAware=1>;
+defm : VPatConversionVF_WF_BF16<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F_ALT",
+ isSEWAware=1>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP_ALT", AllBF16Vectors>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN_ALT", AllBF16Vectors>;
+
+foreach fvti = AllBF16Vectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = GetVTypePredicates<ivti>.Predicates in {
+ // 13.16. Vector Floating-Point Move Instruction
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
+ $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_ALT_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = NoGroupBF16Vectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ (vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
+ VLOpFrag)),
+ (PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_ALT")
+ vti.RegClass:$passthru,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ }
+
+ defvar vfmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
+ vti.ScalarSuffix,
+ "_S_ALT"));
+ // Only pattern-match extract-element operations where the index is 0. Any
+ // other index will have been custom-lowered to slide the vector correctly
+ // into place.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
+ (vfmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
+}
+} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 6acf799..334db4b 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -288,9 +288,12 @@ public:
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
bool hasVInstructionsF16Minimal() const { return HasStdExtZvfhmin; }
bool hasVInstructionsF16() const { return HasStdExtZvfh; }
- bool hasVInstructionsBF16Minimal() const { return HasStdExtZvfbfmin; }
+ bool hasVInstructionsBF16Minimal() const {
+ return HasStdExtZvfbfmin || HasStdExtZvfbfa;
+ }
bool hasVInstructionsF32() const { return HasStdExtZve32f; }
bool hasVInstructionsF64() const { return HasStdExtZve64d; }
+ bool hasVInstructionsBF16() const { return HasStdExtZvfbfa; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 6760f5a..61a0bbe 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1200,6 +1200,23 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
return;
}
+ bool IsNonUniform =
+ hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI);
+
+ auto FirstIndexReg = Instr.getOperand(3).getReg();
+ bool FirstIndexIsConstant =
+ Subtarget.getInstrInfo()->isConstantInstr(*MRI.getVRegDef(FirstIndexReg));
+
+ if (StorageClass == SPIRV::StorageClass::StorageClass::StorageBuffer) {
+ if (IsNonUniform)
+ Handler.addRequirements(
+ SPIRV::Capability::StorageBufferArrayNonUniformIndexingEXT);
+ else if (!FirstIndexIsConstant)
+ Handler.addRequirements(
+ SPIRV::Capability::StorageBufferArrayDynamicIndexing);
+ return;
+ }
+
Register PointeeTypeReg = ResTypeInst->getOperand(2).getReg();
MachineInstr *PointeeType = MRI.getUniqueVRegDef(PointeeTypeReg);
if (PointeeType->getOpcode() != SPIRV::OpTypeImage &&
@@ -1208,27 +1225,25 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
return;
}
- bool IsNonUniform =
- hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI);
if (isUniformTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::UniformTexelBufferArrayDynamicIndexingEXT);
} else if (isInputAttachment(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::InputAttachmentArrayDynamicIndexingEXT);
} else if (isStorageTexelBuffer(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::StorageTexelBufferArrayDynamicIndexingEXT);
} else if (isSampledImage(PointeeType) ||
@@ -1237,14 +1252,14 @@ void addOpAccessChainReqs(const MachineInstr &Instr,
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::SampledImageArrayDynamicIndexing);
} else if (isStorageImage(PointeeType)) {
if (IsNonUniform)
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayNonUniformIndexingEXT);
- else
+ else if (!FirstIndexIsConstant)
Handler.addRequirements(
SPIRV::Capability::StorageImageArrayDynamicIndexing);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2feee05..b05d7c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44813,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
case X86ISD::PCMPGT:
// icmp sgt(0, R) == ashr(R, BitWidth-1).
- // iff we only need the sign bit then we can use R directly.
- if (OriginalDemandedBits.isSignMask() &&
- ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
- return TLO.CombineTo(Op, Op.getOperand(1));
+ if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) {
+ // iff we only need the signbit then we can use R directly.
+ if (OriginalDemandedBits.isSignMask())
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // otherwise we just need R's signbit for the comparison.
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ }
break;
case X86ISD::MOVMSK: {
SDValue Src = Op.getOperand(0);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1d2cd39..5c23f91 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10809,39 +10809,27 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
if (!ST.hasSSE1())
return;
- // PXOR is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::V_SET0), Reg);
} else if (X86::VR256RegClass.contains(Reg)) {
// YMM#
if (!ST.hasAVX())
return;
- // VPXOR is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::AVX_SET0), Reg);
} else if (X86::VR512RegClass.contains(Reg)) {
// ZMM#
if (!ST.hasAVX512())
return;
- // VPXORY is safe to use because it doesn't affect flags.
- BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ BuildMI(MBB, Iter, DL, get(X86::AVX512_512_SET0), Reg);
} else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg)) {
if (!ST.hasVLX())
return;
- // KXOR is safe to use because it doesn't affect flags.
- unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
- BuildMI(MBB, Iter, DL, get(Op), Reg)
- .addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ unsigned Op = ST.hasBWI() ? X86::KSET0Q : X86::KSET0W;
+ BuildMI(MBB, Iter, DL, get(Op), Reg);
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 09cb225..a8eb9b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3757,6 +3757,10 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
// (x < y) ? -1 : zext(x > y)
// (x > y) ? 1 : sext(x != y)
// (x > y) ? 1 : sext(x < y)
+// (x == y) ? 0 : (x > y ? 1 : -1)
+// (x == y) ? 0 : (x < y ? -1 : 1)
+// Special case: x == C ? 0 : (x > C - 1 ? 1 : -1)
+// Special case: x == C ? 0 : (x < C + 1 ? -1 : 1)
// Into ucmp/scmp(x, y), where signedness is determined by the signedness
// of the comparison in the original sequence.
Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
@@ -3849,6 +3853,44 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
}
}
+ // Special cases with constants: x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (Pred == ICmpInst::ICMP_EQ && match(TV, m_Zero())) {
+ const APInt *C;
+ if (match(RHS, m_APInt(C))) {
+ CmpPredicate InnerPred;
+ Value *InnerRHS;
+ const APInt *InnerTV, *InnerFV;
+ if (match(FV,
+ m_Select(m_ICmp(InnerPred, m_Specific(LHS), m_Value(InnerRHS)),
+ m_APInt(InnerTV), m_APInt(InnerFV)))) {
+
+ // x == C ? 0 : (x > C-1 ? 1 : -1)
+ if (ICmpInst::isGT(InnerPred) && InnerTV->isOne() &&
+ InnerFV->isAllOnes()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanSubOne = IsSigned ? !C->isMinSignedValue() : !C->isMinValue();
+ if (CanSubOne) {
+ APInt Cminus1 = *C - 1;
+ if (match(InnerRHS, m_SpecificInt(Cminus1)))
+ Replace = true;
+ }
+ }
+
+ // x == C ? 0 : (x < C+1 ? -1 : 1)
+ if (ICmpInst::isLT(InnerPred) && InnerTV->isAllOnes() &&
+ InnerFV->isOne()) {
+ IsSigned = ICmpInst::isSigned(InnerPred);
+ bool CanAddOne = IsSigned ? !C->isMaxSignedValue() : !C->isMaxValue();
+ if (CanAddOne) {
+ APInt Cplus1 = *C + 1;
+ if (match(InnerRHS, m_SpecificInt(Cplus1)))
+ Replace = true;
+ }
+ }
+ }
+ }
+ }
+
Intrinsic::ID IID = IsSigned ? Intrinsic::scmp : Intrinsic::ucmp;
if (Replace)
return replaceInstUsesWith(
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6e17801..2646334 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -844,6 +844,7 @@ struct AddressSanitizer {
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
void markEscapedLocalAllocas(Function &F);
+ void markCatchParametersAsUninteresting(Function &F);
private:
friend struct FunctionStackPoisoner;
@@ -2997,6 +2998,22 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
}
}
+// Mitigation for https://github.com/google/sanitizers/issues/749
+// We don't instrument Windows catch-block parameters to avoid
+// interfering with exception handling assumptions.
+void AddressSanitizer::markCatchParametersAsUninteresting(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CatchPad = dyn_cast<CatchPadInst>(&I)) {
+ // Mark the parameters to a catch-block as uninteresting to avoid
+ // instrumenting them.
+ for (Value *Operand : CatchPad->arg_operands())
+ if (auto *AI = dyn_cast<AllocaInst>(Operand))
+ ProcessedAllocas[AI] = false;
+ }
+ }
+ }
+}
bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
bool ShouldInstrument =
@@ -3041,6 +3058,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
// can be passed to that intrinsic.
markEscapedLocalAllocas(F);
+ if (TargetTriple.isOSWindows())
+ markCatchParametersAsUninteresting(F);
+
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
SmallPtrSet<Value *, 16> TempsToInstrument;
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 7da8586..d827e64 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -217,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
// Get the analysis results needed by loop passes.
MemorySSA *MSSA =
UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
- BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
- ? (&AM.getResult<BlockFrequencyAnalysis>(F))
- : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -227,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<ScalarEvolutionAnalysis>(F),
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
- BFI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index ed4e2b1..3487e81 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout(
static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt",
cl::init(false));
+static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
+ "matrix-split-matmul-remainder-over-threshold", cl::Hidden,
+ cl::desc("Illegal remainder vectors over this size in bits should be split "
+ "in the inner loop of matmul"),
+ cl::init(0));
+
/// Helper function to either return Scope, if it is a subprogram or the
/// attached subprogram for a local scope.
static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -1720,6 +1726,31 @@ public:
ToRemove.push_back(MatMul);
}
+ /// Given \p Remainder iterations of the the matmul inner loop,
+ /// potentially lower \p Blocksize that is used for the underlying
+ /// vector.
+ unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) {
+ if (BlockSize <= Remainder)
+ return BlockSize;
+
+ // If the remainder is also a legal type just use it.
+ auto *VecTy = FixedVectorType::get(EltType, Remainder);
+ if (TTI.isTypeLegal(VecTy))
+ return Remainder;
+
+ // Similarly, if the vector is small enough that we don't want
+ // to split further.
+ if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold)
+ return Remainder;
+
+ // Gradually lower the vectorization factor to cover the
+ // remainder.
+ do {
+ BlockSize /= 2;
+ } while (BlockSize > Remainder);
+ return BlockSize;
+ }
+
/// Compute \p Result += \p A * \p B for input matrices with left-associating
/// addition.
///
@@ -1757,10 +1788,8 @@ public:
bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J));
for (unsigned I = 0; I < R; I += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (I + BlockSize > R)
- BlockSize /= 2;
-
+ // Lower block size to make sure we stay within bounds.
+ BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType());
Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder)
: nullptr;
for (unsigned K = 0; K < M; ++K) {
@@ -1785,9 +1814,8 @@ public:
unsigned BlockSize = VF;
bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I));
for (unsigned J = 0; J < C; J += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (J + BlockSize > C)
- BlockSize /= 2;
+ // Lower the vectorization factor to cover the remainder.
+ BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType());
Value *Sum = nullptr;
for (unsigned K = 0; K < M; ++K) {
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index e4ba70d..5af6c96 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, AAResults &AA,
TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial, ScalarEvolution *SE,
- MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
+ MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (F->hasOptSize())
return false;
- // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
- // of the loops L is nested in, and of the loops nested in L are all cold.
- auto IsLoopNestCold = [&](const Loop *L) {
- // Check L and all of its parent loops.
- auto *Parent = L;
- while (Parent) {
- if (!PSI->isColdBlock(Parent->getHeader(), BFI))
- return false;
- Parent = Parent->getParentLoop();
- }
- // Next check all loops nested within L.
- SmallVector<const Loop *, 4> Worklist;
- llvm::append_range(Worklist, L->getSubLoops());
- while (!Worklist.empty()) {
- auto *CurLoop = Worklist.pop_back_val();
- if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
- return false;
- llvm::append_range(Worklist, CurLoop->getSubLoops());
- }
- return true;
- };
-
- // Skip cold loops in cold loop nests, as unswitching them brings little
- // benefit but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
- LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
- return false;
- }
-
// Perform legality checks.
if (!isSafeForNoNTrivialUnswitching(L, LI))
return false;
@@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
- ProfileSummaryInfo *PSI = nullptr;
- if (auto OuterProxy =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
- .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
- PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
+ &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 9693ae6..4947d03 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -634,18 +635,10 @@ private:
/// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
/// changes.
bool mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts = {
/*MayIncludeUndef=*/false, /*CheckWiden=*/false});
- bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
- ValueLatticeElement::MergeOptions Opts = {
- /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
- assert(!V->getType()->isStructTy() &&
- "non-structs should use markConstant");
- return mergeInValue(ValueState[V], V, MergeWithV, Opts);
- }
-
/// getValueState - Return the ValueLatticeElement object that corresponds to
/// the value. This function handles the case when the value hasn't been seen
/// yet by properly seeding constants etc.
@@ -768,6 +761,7 @@ private:
void handleCallArguments(CallBase &CB);
void handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO, unsigned Idx);
+ bool isInstFullyOverDefined(Instruction &Inst);
private:
friend class InstVisitor<SCCPInstVisitor>;
@@ -987,7 +981,7 @@ public:
void trackValueOfArgument(Argument *A) {
if (A->getType()->isStructTy())
return (void)markOverdefined(A);
- mergeInValue(A, getArgAttributeVL(A));
+ mergeInValue(ValueState[A], A, getArgAttributeVL(A));
}
bool isStructLatticeConstant(Function *F, StructType *STy);
@@ -1128,8 +1122,7 @@ bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
assert(It != TrackedMultipleRetVals.end());
- ValueLatticeElement LV = It->second;
- if (!SCCPSolver::isConstant(LV))
+ if (!SCCPSolver::isConstant(It->second))
return false;
}
return true;
@@ -1160,7 +1153,7 @@ Constant *SCCPInstVisitor::getConstantOrNull(Value *V) const {
std::vector<Constant *> ConstVals;
auto *ST = cast<StructType>(V->getType());
for (unsigned I = 0, E = ST->getNumElements(); I != E; ++I) {
- ValueLatticeElement LV = LVs[I];
+ const ValueLatticeElement &LV = LVs[I];
ConstVals.push_back(SCCPSolver::isConstant(LV)
? getConstant(LV, ST->getElementType(I))
: UndefValue::get(ST->getElementType(I)));
@@ -1225,7 +1218,7 @@ void SCCPInstVisitor::visitInstruction(Instruction &I) {
}
bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V,
- ValueLatticeElement MergeWithV,
+ const ValueLatticeElement &MergeWithV,
ValueLatticeElement::MergeOptions Opts) {
if (IV.mergeIn(MergeWithV, Opts)) {
pushUsersToWorkList(V);
@@ -1264,7 +1257,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
- ValueLatticeElement BCValue = getValueState(BI->getCondition());
+ const ValueLatticeElement &BCValue = getValueState(BI->getCondition());
ConstantInt *CI = getConstantInt(BCValue, BI->getCondition()->getType());
if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
@@ -1326,7 +1319,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
// the target as executable.
if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
// Casts are folded by visitCastInst.
- ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
+ const ValueLatticeElement &IBRValue = getValueState(IBR->getAddress());
BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(
getConstant(IBRValue, IBR->getAddress()->getType()));
if (!Addr) { // Overdefined or unknown condition?
@@ -1383,49 +1376,66 @@ bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// 7. If a conditional branch has a value that is overdefined, make all
// successors executable.
void SCCPInstVisitor::visitPHINode(PHINode &PN) {
- // If this PN returns a struct, just mark the result overdefined.
- // TODO: We could do a lot better than this if code actually uses this.
- if (PN.getType()->isStructTy())
- return (void)markOverdefined(&PN);
-
- if (getValueState(&PN).isOverdefined())
- return; // Quick exit
-
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
// and slow us down a lot. Just mark them overdefined.
if (PN.getNumIncomingValues() > 64)
return (void)markOverdefined(&PN);
- unsigned NumActiveIncoming = 0;
+ if (isInstFullyOverDefined(PN))
+ return;
+ SmallVector<unsigned> FeasibleIncomingIndices;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+ FeasibleIncomingIndices.push_back(i);
+ }
// Look at all of the executable operands of the PHI node. If any of them
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
// constant. If they are constant and don't agree, the PHI is a constant
// range. If there are no executable operands, the PHI remains unknown.
- ValueLatticeElement PhiState = getValueState(&PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
- continue;
-
- ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
- PhiState.mergeIn(IV);
- NumActiveIncoming++;
- if (PhiState.isOverdefined())
- break;
+ if (StructType *STy = dyn_cast<StructType>(PN.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement PhiState = getStructValueState(&PN, i);
+ if (PhiState.isOverdefined())
+ continue;
+ for (unsigned j : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV =
+ getStructValueState(PN.getIncomingValue(j), i);
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ ValueLatticeElement &PhiStateRef = getStructValueState(&PN, i);
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
+ }
+ } else {
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ ValueLatticeElement &PhiStateRef = ValueState[&PN];
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
}
-
- // We allow up to 1 range extension per active incoming value and one
- // additional extension. Note that we manually adjust the number of range
- // extensions to match the number of active incoming values. This helps to
- // limit multiple extensions caused by the same incoming value, if other
- // incoming values are equal.
- mergeInValue(&PN, PhiState,
- ValueLatticeElement::MergeOptions().setMaxWidenSteps(
- NumActiveIncoming + 1));
- ValueLatticeElement &PhiStateRef = getValueState(&PN);
- PhiStateRef.setNumRangeExtensions(
- std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
}
void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
@@ -1481,7 +1491,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
}
}
- ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ const ValueLatticeElement &OpSt = getValueState(I.getOperand(0));
if (OpSt.isUnknownOrUndef())
return;
@@ -1496,9 +1506,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (I.getDestTy()->isIntOrIntVectorTy() &&
I.getSrcTy()->isIntOrIntVectorTy() &&
I.getOpcode() != Instruction::BitCast) {
- auto &LV = getValueState(&I);
ConstantRange OpRange =
OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false);
+ auto &LV = getValueState(&I);
Type *DestTy = I.getDestTy();
ConstantRange Res = ConstantRange::getEmpty(DestTy->getScalarSizeInBits());
@@ -1516,19 +1526,24 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO,
unsigned Idx) {
Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
- ValueLatticeElement L = getValueState(LHS);
- ValueLatticeElement R = getValueState(RHS);
+ Type *Ty = LHS->getType();
+
addAdditionalUser(LHS, &EVI);
addAdditionalUser(RHS, &EVI);
- if (L.isUnknownOrUndef() || R.isUnknownOrUndef())
- return; // Wait to resolve.
- Type *Ty = LHS->getType();
+ const ValueLatticeElement &L = getValueState(LHS);
+ if (L.isUnknownOrUndef())
+ return; // Wait to resolve.
ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false);
+
+ const ValueLatticeElement &R = getValueState(RHS);
+ if (R.isUnknownOrUndef())
+ return; // Wait to resolve.
+
ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false);
if (Idx == 0) {
ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
- mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
+ mergeInValue(ValueState[&EVI], &EVI, ValueLatticeElement::getRange(Res));
} else {
assert(Idx == 1 && "Index can only be 0 or 1");
ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
@@ -1560,7 +1575,7 @@ void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
if (auto *WO = dyn_cast<WithOverflowInst>(AggVal))
return handleExtractOfWithOverflow(EVI, WO, i);
ValueLatticeElement EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ mergeInValue(ValueState[&EVI], &EVI, EltVal);
} else {
// Otherwise, must be extracting from an array.
return (void)markOverdefined(&EVI);
@@ -1616,14 +1631,18 @@ void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement CondValue = getValueState(I.getCondition());
+ const ValueLatticeElement &CondValue = getValueState(I.getCondition());
if (CondValue.isUnknownOrUndef())
return;
if (ConstantInt *CondCB =
getConstantInt(CondValue, I.getCondition()->getType())) {
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
- mergeInValue(&I, getValueState(OpVal));
+ const ValueLatticeElement &OpValState = getValueState(OpVal);
+ // Safety: ValueState[&I] doesn't invalidate OpValState since it is already
+ // in the map.
+ assert(ValueState.contains(&I) && "&I is not in ValueState map.");
+ mergeInValue(ValueState[&I], &I, OpValState);
return;
}
@@ -1721,7 +1740,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
// being a special floating value.
ValueLatticeElement NewV;
NewV.markConstant(C, /*MayIncludeUndef=*/true);
- return (void)mergeInValue(&I, NewV);
+ return (void)mergeInValue(ValueState[&I], &I, NewV);
}
}
@@ -1741,7 +1760,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind());
else
R = A.binaryOp(BO->getOpcode(), B);
- mergeInValue(&I, ValueLatticeElement::getRange(R));
+ mergeInValue(ValueState[&I], &I, ValueLatticeElement::getRange(R));
// TODO: Currently we do not exploit special values that produce something
// better than overdefined with an overdefined operand for vector or floating
@@ -1767,7 +1786,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
if (C) {
ValueLatticeElement CV;
CV.markConstant(C);
- mergeInValue(&I, CV);
+ mergeInValue(ValueState[&I], &I, CV);
return;
}
@@ -1802,7 +1821,7 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
Operands.reserve(I.getNumOperands());
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- ValueLatticeElement State = getValueState(I.getOperand(i));
+ const ValueLatticeElement &State = getValueState(I.getOperand(i));
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1881,14 +1900,13 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
if (ValueState[&I].isOverdefined())
return (void)markOverdefined(&I);
- ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
+ const ValueLatticeElement &PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUnknownOrUndef())
return; // The pointer is not resolved yet!
- ValueLatticeElement &IV = ValueState[&I];
-
if (SCCPSolver::isConstant(PtrVal)) {
Constant *Ptr = getConstant(PtrVal, I.getOperand(0)->getType());
+ ValueLatticeElement &IV = ValueState[&I];
// load null is undefined.
if (isa<ConstantPointerNull>(Ptr)) {
@@ -1916,7 +1934,7 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
}
// Fall back to metadata.
- mergeInValue(&I, getValueFromMetadata(&I));
+ mergeInValue(ValueState[&I], &I, getValueFromMetadata(&I));
}
void SCCPInstVisitor::visitCallBase(CallBase &CB) {
@@ -1944,7 +1962,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
return markOverdefined(&CB); // Can't handle struct args.
if (A.get()->getType()->isMetadataTy())
continue; // Carried in CB, not allowed in Operands.
- ValueLatticeElement State = getValueState(A);
+ const ValueLatticeElement &State = getValueState(A);
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
@@ -1964,7 +1982,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
}
// Fall back to metadata.
- mergeInValue(&CB, getValueFromMetadata(&CB));
+ mergeInValue(ValueState[&CB], &CB, getValueFromMetadata(&CB));
}
void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
@@ -1992,10 +2010,11 @@ void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
getMaxWidenStepsOpts());
}
- } else
- mergeInValue(&*AI,
- getValueState(*CAI).intersect(getArgAttributeVL(&*AI)),
- getMaxWidenStepsOpts());
+ } else {
+ ValueLatticeElement CallArg =
+ getValueState(*CAI).intersect(getArgAttributeVL(&*AI));
+ mergeInValue(ValueState[&*AI], &*AI, CallArg, getMaxWidenStepsOpts());
+ }
}
}
}
@@ -2076,7 +2095,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
if (II->getIntrinsicID() == Intrinsic::vscale) {
unsigned BitWidth = CB.getType()->getScalarSizeInBits();
const ConstantRange Result = getVScaleRange(II->getFunction(), BitWidth);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
@@ -2094,7 +2114,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
ConstantRange Result =
ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
- return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ return (void)mergeInValue(ValueState[II], II,
+ ValueLatticeElement::getRange(Result));
}
}
@@ -2121,10 +2142,25 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return handleCallOverdefined(CB); // Not tracking this callee.
// If so, propagate the return value of the callee into this call result.
- mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
+ mergeInValue(ValueState[&CB], &CB, TFRVI->second, getMaxWidenStepsOpts());
}
}
+bool SCCPInstVisitor::isInstFullyOverDefined(Instruction &Inst) {
+ // For structure Type, we handle each member separately.
+ // A structure object won't be considered as overdefined when
+ // there is at least one member that is not overdefined.
+ if (StructType *STy = dyn_cast<StructType>(Inst.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) {
+ if (!getStructValueState(&Inst, i).isOverdefined())
+ return false;
+ }
+ return true;
+ }
+
+ return getValueState(&Inst).isOverdefined();
+}
+
void SCCPInstVisitor::solve() {
// Process the work lists until they are empty!
while (!BBWorkList.empty() || !InstWorkList.empty()) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b62c8f1..9cd52da 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2242,8 +2242,49 @@ public:
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const;
+
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with constant stride).
+ ///
+ /// TODO:
+ /// It is possible that the load gets "widened". Suppose that originally each
+ /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is
+ /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2
+ /// ...
+ /// %b + 0 * %s + (w - 1)
+ ///
+ /// %b + 1 * %s + 0
+ /// %b + 1 * %s + 1
+ /// %b + 1 * %s + 2
+ /// ...
+ /// %b + 1 * %s + (w - 1)
+ /// ...
+ ///
+ /// %b + (n - 1) * %s + 0
+ /// %b + (n - 1) * %s + 1
+ /// %b + (n - 1) * %s + 2
+ /// ...
+ /// %b + (n - 1) * %s + (w - 1)
+ ///
+ /// In this case we will generate a strided load of type `<n x (k * w)>`.
+ ///
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ElemTy original scalar type of loads.
+ /// \param Alignment alignment of the first load.
+ /// \param SortedIndices is the order of PointerOps as returned by
+ /// `sortPtrAccesses`
+ /// \param Diff Pointer difference between the lowest and the highes pointer
+ /// in `PointerOps` as returned by `getPointersDiff`.
+ /// \param Ptr0 first pointer in `PointersOps`.
+ /// \param PtrN last pointer in `PointersOps`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ElemTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Return true if an array of scalar loads can be replaced with a strided
/// load (with run-time stride).
@@ -6849,9 +6890,8 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = PointerOps.size();
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const {
if (Diff % (Sz - 1) != 0)
return false;
@@ -6875,27 +6915,40 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
+ return true;
+ }
+ return false;
+}
- // Iterate through all pointers and check if all distances are
- // unique multiple of Dist.
- SmallSet<int64_t, 4> Dists;
- for (Value *Ptr : PointerOps) {
- int64_t Dist = 0;
- if (Ptr == PtrN)
- Dist = Diff;
- else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
- // If the strides are not the same or repeated, we can't
- // vectorize.
- if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
- break;
- }
- if (Dists.size() == Sz) {
- Type *StrideTy = DL->getIndexType(Ptr0->getType());
- SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
- SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
- return true;
- }
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ScalarTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
+ if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz))
+ return false;
+
+ int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
+
+ // Iterate through all pointers and check if all distances are
+ // unique multiple of Dist.
+ SmallSet<int64_t, 4> Dists;
+ for (Value *Ptr : PointerOps) {
+ int64_t Dist = 0;
+ if (Ptr == PtrN)
+ Dist = Diff;
+ else if (Ptr != Ptr0)
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
+ // If the strides are not the same or repeated, we can't
+ // vectorize.
+ if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+ break;
+ }
+ if (Dists.size() == Sz) {
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
+ SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
+ SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
+ return true;
}
return false;
}
@@ -6995,8 +7048,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
- if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
- SPtrInfo))
+ if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, Alignment, Order,
+ *Diff, Ptr0, PtrN, SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0e0b042..84d2ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -407,6 +407,10 @@ public:
VPBasicBlock *getParent() { return Parent; }
const VPBasicBlock *getParent() const { return Parent; }
+ /// \return the VPRegionBlock which the recipe belongs to.
+ VPRegionBlock *getRegion();
+ const VPRegionBlock *getRegion() const;
+
/// The method which generates the output IR instructions that correspond to
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(VPTransformState &State) = 0;
@@ -4075,6 +4079,14 @@ public:
}
};
+inline VPRegionBlock *VPRecipeBase::getRegion() {
+ return getParent()->getParent();
+}
+
+inline const VPRegionBlock *VPRecipeBase::getRegion() const {
+ return getParent()->getParent();
+}
+
/// VPlan models a candidate for vectorization, encoding various decisions take
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index f413c63..7e074c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -377,7 +377,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
#ifndef NDEBUG
auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
- auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
+ VPRegionBlock *Region = R->getRegion();
if (Region && Region->isReplicator()) {
assert(Region->getNumSuccessors() == 1 &&
Region->getNumPredecessors() == 1 && "Expected SESE region!");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7a98c75..d1e67e6b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2352,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = getParent()->getParent()->getCanonicalIV();
+ auto *CanIV = getRegion()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
getScalarType() == CanIV->getScalarType();
}
@@ -3076,7 +3076,7 @@ static void scalarizeInstruction(const Instruction *Instr,
State.AC->registerAssumption(II);
assert(
- (RepRecipe->getParent()->getParent() ||
+ (RepRecipe->getRegion() ||
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
all_of(RepRecipe->operands(),
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
@@ -3268,7 +3268,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
to_vector(operands()), VF);
// If the recipe is not predicated (i.e. not in a replicate region), return
// the scalar cost. Otherwise handle predicated cost.
- if (!getParent()->getParent()->isReplicator())
+ if (!getRegion()->isReplicator())
return ScalarCost;
// Account for the phi nodes that we will create.
@@ -3284,7 +3284,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
case Instruction::Store: {
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
- const VPRegionBlock *ParentRegion = getParent()->getParent();
+ const VPRegionBlock *ParentRegion = getRegion();
if (ParentRegion && ParentRegion->isReplicator())
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cae9aee8..f5f616f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1858,8 +1858,8 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
VPRegionBlock *EnclosingLoopRegion =
HoistCandidate->getParent()->getEnclosingLoopRegion();
- assert((!HoistCandidate->getParent()->getParent() ||
- HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) &&
+ assert((!HoistCandidate->getRegion() ||
+ HoistCandidate->getRegion() == EnclosingLoopRegion) &&
"CFG in VPlan should still be flat, without replicate regions");
// Hoist candidate was already visited, no need to hoist.
if (!Visited.insert(HoistCandidate).second)
@@ -2898,7 +2898,7 @@ void VPlanTransforms::replaceSymbolicStrides(
// evolution.
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = cast<VPRecipeBase>(&U);
- return R->getParent()->getParent() ||
+ return R->getRegion() ||
R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
};
ValueToSCEVMapTy RewriteMap;
@@ -3803,8 +3803,7 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
- VPRegionBlock *ParentRegion =
- cast<VPRecipeBase>(U)->getParent()->getParent();
+ VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
if ((isa<VPReplicateRecipe>(DefR) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index cf95ac0..9a2497e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -64,7 +64,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
return true;
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
- const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
+ const VPRegionBlock *RegionOfR = Rep->getRegion();
// Don't consider recipes in replicate regions as uniform yet; their first
// lane cannot be accessed when executing the replicate region for other
// lanes.