aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp26
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp12
-rw-r--r--llvm/lib/IR/ConstantFPRange.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h3
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp33
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td21
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td77
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td21
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td46
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td19
-rw-r--r--llvm/lib/Target/SPIRV/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCombine.td4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp60
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h38
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp82
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp77
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/TargetParser/Triple.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp61
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp12
40 files changed, 494 insertions, 243 deletions
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0ca55a26..54e916e 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -118,6 +118,10 @@ static cl::opt<bool>
#endif
cl::desc(""));
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+ "preserve-bc-uselistorder", cl::Hidden, cl::init(true),
+ cl::desc("Preserve use-list order when writing LLVM bitcode."));
+
namespace llvm {
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
}
@@ -217,7 +221,10 @@ public:
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index)
: BitcodeWriterBase(Stream, StrtabBuilder), M(M),
- VE(M, ShouldPreserveUseListOrder), Index(Index) {
+ VE(M, PreserveBitcodeUseListOrder.getNumOccurrences()
+ ? PreserveBitcodeUseListOrder
+ : ShouldPreserveUseListOrder),
+ Index(Index) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index aa078f3..e40fb76 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -704,9 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
}
+// FIXME: change callsites to use the new DW_LNAME_ language codes.
llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const {
- return static_cast<llvm::dwarf::SourceLanguage>(
- getLanguage().getUnversionedName());
+ const auto &Lang = getLanguage();
+
+ if (!Lang.hasVersionedName())
+ return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName());
+
+ return llvm::dwarf::toDW_LANG(
+ static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+ Lang.getVersion())
+ .value_or(llvm::dwarf::DW_LANG_hi_user);
}
std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index d5153b7..cdcb29d9 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1209,7 +1209,7 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
MIE = MBB.instr_begin();
MII != MIE; --MII) {
const MachineInstr &MI = *std::prev(MII);
- if (MI.isDebugInstr() || MI.isPseudoProbe())
+ if (MI.isDebugOrPseudoInstr())
continue;
RegisterOperands RegOpers;
RegOpers.collect(MI, *TRI, *MRI, false, false);
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 5f37890..7d4674b 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -858,7 +858,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
void RegPressureTracker::recede(SmallVectorImpl<VRegMaskOrUnit> *LiveUses) {
recedeSkipDebugValues();
- if (CurrPos->isDebugInstr() || CurrPos->isPseudoProbe()) {
+ if (CurrPos->isDebugOrPseudoInstr()) {
// It's possible to only have debug_value and pseudo probe instructions and
// hit the start of the block.
assert(CurrPos == MBB->begin());
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b47274b..b23b190 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10628,7 +10628,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// folding this will increase the total number of instructions.
if (N0.getOpcode() == ISD::SRL &&
(N0.getOperand(1) == N1 || N0.hasOneUse()) &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ TLI.shouldFoldConstantShiftPairToMask(N)) {
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
@@ -11207,7 +11207,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
// (and (srl x, (sub c2, c1), MASK)
if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ TLI.shouldFoldConstantShiftPairToMask(N)) {
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b5f8a61..437d0f4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3313,7 +3313,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break;
// Unary FP Operations
- case ISD::FABS:
case ISD::FACOS:
case ISD::FASIN:
case ISD::FATAN:
@@ -3329,7 +3328,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FLOG10:
case ISD::FNEARBYINT:
- case ISD::FNEG:
case ISD::FREEZE:
case ISD::FRINT:
case ISD::FROUND:
@@ -3341,6 +3339,12 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FTAN:
case ISD::FTANH:
case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
+ case ISD::FABS:
+ R = SoftPromoteHalfRes_FABS(N);
+ break;
+ case ISD::FNEG:
+ R = SoftPromoteHalfRes_FNEG(N);
+ break;
case ISD::AssertNoFPClass:
R = SoftPromoteHalfRes_AssertNoFPClass(N);
break;
@@ -3670,6 +3674,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FABS(SDNode *N) {
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Clear the sign bit.
+ return DAG.getNode(ISD::AND, dl, MVT::i16, Op,
+ DAG.getConstant(0x7fff, dl, MVT::i16));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FNEG(SDNode *N) {
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Invert the sign bit.
+ return DAG.getNode(ISD::XOR, dl, MVT::i16, Op,
+ DAG.getConstant(0x8000, dl, MVT::i16));
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_AssertNoFPClass(SDNode *N) {
return GetSoftPromotedHalf(N->getOperand(0));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d580ce0..603dc34 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -832,6 +832,8 @@ private:
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_FABS(SDNode *N);
+ SDValue SoftPromoteHalfRes_FNEG(SDNode *N);
SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N);
SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 0bc877d..2430d98 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -102,6 +102,10 @@ static cl::opt<bool> PrintProfData(
"print-prof-data", cl::Hidden,
cl::desc("Pretty print perf data (branch weights, etc) when dumping"));
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+ "preserve-ll-uselistorder", cl::Hidden, cl::init(false),
+ cl::desc("Preserve use-list order when writing LLVM assembly."));
+
// Make virtual table appear in this compilation unit.
AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default;
@@ -2939,7 +2943,10 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
bool IsForDebug, bool ShouldPreserveUseListOrder)
: Out(o), TheModule(M), Machine(Mac), TypePrinter(M), AnnotationWriter(AAW),
IsForDebug(IsForDebug),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+ ShouldPreserveUseListOrder(
+ PreserveAssemblyUseListOrder.getNumOccurrences()
+ ? PreserveAssemblyUseListOrder
+ : ShouldPreserveUseListOrder) {
if (!TheModule)
return;
for (const GlobalObject &GO : TheModule->global_objects())
@@ -2950,7 +2957,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug)
: Out(o), TheIndex(Index), Machine(Mac), TypePrinter(/*Module=*/nullptr),
- IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false) {}
+ IsForDebug(IsForDebug),
+ ShouldPreserveUseListOrder(PreserveAssemblyUseListOrder) {}
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (!Operand) {
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index 2477e22..070e833 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -326,6 +326,8 @@ std::optional<bool> ConstantFPRange::getSignBit() const {
}
bool ConstantFPRange::operator==(const ConstantFPRange &CR) const {
+ assert(&getSemantics() == &CR.getSemantics() &&
+ "Should only use the same semantics");
if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN)
return false;
return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper);
@@ -425,3 +427,20 @@ ConstantFPRange ConstantFPRange::getWithoutInf() const {
return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN,
MayBeSNaN);
}
+
+ConstantFPRange ConstantFPRange::cast(const fltSemantics &DstSem,
+ APFloat::roundingMode RM) const {
+ bool LosesInfo;
+ APFloat NewLower = Lower;
+ APFloat NewUpper = Upper;
+ // For conservative, return full range if conversion is invalid.
+ if (NewLower.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp ||
+ NewLower.isNaN())
+ return getFull(DstSem);
+ if (NewUpper.convert(DstSem, RM, &LosesInfo) == APFloat::opInvalidOp ||
+ NewUpper.isNaN())
+ return getFull(DstSem);
+ return ConstantFPRange(std::move(NewLower), std::move(NewUpper),
+ /*MayBeQNaNVal=*/MayBeQNaN || MayBeSNaN,
+ /*MayBeSNaNVal=*/false);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7294f3e..fbce3b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18640,7 +18640,7 @@ bool AArch64TargetLowering::isDesirableToCommuteXorWithShift(
}
bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
- const SDNode *N, CombineLevel Level) const {
+ const SDNode *N) const {
assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) ||
(N->getOpcode() == ISD::SRL &&
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e472e7d..00956fd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -300,8 +300,7 @@ public:
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
/// Return true if it is profitable to fold a pair of shifts into a mask.
- bool shouldFoldConstantShiftPairToMask(const SDNode *N,
- CombineLevel Level) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 83c7def..67ea2dd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13816,7 +13816,7 @@ bool ARMTargetLowering::isDesirableToCommuteXorWithShift(
}
bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
- const SDNode *N, CombineLevel Level) const {
+ const SDNode *N) const {
assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) ||
(N->getOpcode() == ISD::SRL &&
@@ -13826,7 +13826,8 @@ bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
if (!Subtarget->isThumb1Only())
return true;
- if (Level == BeforeLegalizeTypes)
+ EVT VT = N->getValueType(0);
+ if (VT.getScalarSizeInBits() > 32)
return true;
return false;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 26ff54c..70aa001 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -772,8 +772,7 @@ class VectorType;
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
- bool shouldFoldConstantShiftPairToMask(const SDNode *N,
- CombineLevel Level) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index b05de49..7f1ff45 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1306,7 +1306,7 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
}
bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
- const SDNode *N, CombineLevel Level) const {
+ const SDNode *N) const {
assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) ||
(N->getOpcode() == ISD::SRL &&
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index c65c76c..25a0bf9 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -290,8 +290,7 @@ class TargetRegisterClass;
bool isCheapToSpeculateCttz(Type *Ty) const override;
bool isCheapToSpeculateCtlz(Type *Ty) const override;
bool hasBitTest(SDValue X, SDValue Y) const override;
- bool shouldFoldConstantShiftPairToMask(const SDNode *N,
- CombineLevel Level) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f692180..944a1e2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ // Custom handling for PowerPC ucmp instruction
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
+
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
@@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
}
+// Lower unsigned 3-way compare producing -1/0/1.
+SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue A = DAG.getFreeze(Op.getOperand(0));
+ SDValue B = DAG.getFreeze(Op.getOperand(1));
+ EVT OpVT = A.getValueType(); // operand type
+ EVT ResVT = Op.getValueType(); // result type
+
+ // First compute diff = A - B (will become subf).
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
+
+ // Generate B - A using SUBC to capture carry.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
+ SDValue CA0 = SubC.getValue(1);
+
+ // t2 = A - B + CA0 using SUBE.
+ SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
+ SDValue CA1 = SubE1.getValue(1);
+
+ // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
+ SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
+
+ // Extract the first result and truncate to result type if needed
+ return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerADDSUBO_CARRY(Op, DAG);
+ case ISD::UCMP:
+ return LowerUCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6694305..59f3387 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1318,6 +1318,7 @@ namespace llvm {
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 7a14929..66717b9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1367,9 +1367,8 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, (${rs1})",
class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
- ValueType vt2 = XLenVT>
- : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
ValueType vt = XLenVT>
@@ -1653,17 +1652,18 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
node:$falsev), [{}],
IntCCtoRISCVCC>;
-multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
+multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt,
+ ValueType cmpvt = XLenVT> {
let usesCustomInserter = 1 in
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
(ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
valty:$truev, valty:$falsev),
[(set valty:$dst,
- (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond,
+ (riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), GPR:$rhs, cond,
(vt valty:$truev), valty:$falsev))]>;
// Explicitly select 0 in the condition to X0. The register coalescer doesn't
// always do it.
- def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev),
+ def : Pat<(riscv_selectcc_frag:$cc (cmpvt GPR:$lhs), 0, cond, (vt valty:$truev),
valty:$falsev),
(!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0),
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
@@ -1972,8 +1972,9 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs
/// Loads
-class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(vt (LoadOp (AddrRegImm (XLenVT GPRMem:$rs1), simm12_lo:$imm12))),
+class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT,
+ ValueType PtrVT = XLenVT>
+ : Pat<(vt (LoadOp (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12))),
(Inst GPRMem:$rs1, simm12_lo:$imm12)>;
def : LdPat<sextloadi8, LB>;
@@ -1987,8 +1988,8 @@ def : LdPat<zextloadi16, LHU>;
/// Stores
class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt>
- : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (XLenVT GPRMem:$rs1),
+ ValueType vt, ValueType PtrVT = XLenVT>
+ : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (PtrVT GPRMem:$rs1),
simm12_lo:$imm12)),
(Inst StTy:$rs2, GPRMem:$rs1, simm12_lo:$imm12)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index b9510ef..afac37d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -59,9 +59,9 @@ def FPR64IN32X : RegisterOperand<GPRPair> {
def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>;
def ZdinxExt : ExtInfo<"_INX", "Zfinx", [HasStdExtZdinx, IsRV64],
- f64, FPR64INX, FPR32INX, FPR64INX, ?>;
+ f64, FPR64INX, FPR32INX, FPR64INX, ?, i64>;
def Zdinx32Ext : ExtInfo<"_IN32X", "ZdinxRV32Only", [HasStdExtZdinx, IsRV32],
- f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>;
+ f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?, i32>;
defvar DExts = [DExt, ZdinxExt, Zdinx32Ext];
defvar DExtsRV64 = [DExt, ZdinxExt];
@@ -261,8 +261,10 @@ let Predicates = [HasStdExtZdinx, IsRV32] in {
/// Float conversion operations
// f64 -> f32, f32 -> f64
-def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1, FRM_RNE)>;
+def : Pat<(any_fpround FPR64IN32X:$rs1),
+ (FCVT_S_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>;
+def : Pat<(any_fpextend FPR32INX:$rs1),
+ (FCVT_D_S_IN32X FPR32INX:$rs1, (i32 FRM_RNE))>;
} // Predicates = [HasStdExtZdinx, IsRV32]
// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
@@ -321,7 +323,7 @@ def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>;
def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>;
-def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
+def : Pat<(i64 (riscv_fclass FPR64INX:$rs1)), (FCLASS_D_INX $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>;
def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_INX, FPR64INX, f64>;
@@ -354,41 +356,46 @@ def : Pat<(fneg (any_fma_nsz FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3)),
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
-def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
+def : Pat<(any_fsqrt FPR64IN32X:$rs1),
+ (FSQRT_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>;
def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>;
def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>;
-def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
+def : Pat<(i32 (riscv_fclass FPR64IN32X:$rs1)), (FCLASS_D_IN32X $rs1)>;
def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>;
def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_IN32X, FPR64IN32X, f64>;
def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)),
(FSGNJN_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2)>;
def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2),
- (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>;
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, (i32 FRM_RNE)))>;
def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2),
- (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>;
+ (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, (i32 FRM_DYN)))>;
// fmadd: rs1 * rs2 + rs3
def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3),
- (FMADD_D_IN32X $rs1, $rs2, $rs3, FRM_DYN)>;
+ (FMADD_D_IN32X $rs1, $rs2, $rs3, (i32 FRM_DYN))>;
// fmsub: rs1 * rs2 - rs3
def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)),
- (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+ (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3,
+ (i32 FRM_DYN))>;
// fnmsub: -rs1 * rs2 + rs3
def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, FPR64IN32X:$rs3),
- (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+ (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3,
+ (i32 FRM_DYN))>;
// fnmadd: -rs1 * rs2 - rs3
def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)),
- (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+ (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3,
+ (i32 FRM_DYN))>;
// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
def : Pat<(fneg (any_fma_nsz FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3)),
- (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+ (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3,
+ (i32 FRM_DYN))>;
} // Predicates = [HasStdExtZdinx, IsRV32]
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
@@ -441,42 +448,42 @@ def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D, f64>;
let Predicates = [HasStdExtZdinx, IsRV64] in {
// Match signaling FEQ_D
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETEQ)),
+def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETEQ)),
(AND (XLenVT (FLE_D_INX $rs1, $rs2)),
(XLenVT (FLE_D_INX $rs2, $rs1)))>;
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETOEQ)),
+def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs2, SETOEQ)),
(AND (XLenVT (FLE_D_INX $rs1, $rs2)),
(XLenVT (FLE_D_INX $rs2, $rs1)))>;
// If both operands are the same, use a single FLE.
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETEQ)),
+def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETEQ)),
(FLE_D_INX $rs1, $rs1)>;
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETOEQ)),
+def : Pat<(XLenVT (strict_fsetccs FPR64INX:$rs1, FPR64INX:$rs1, SETOEQ)),
(FLE_D_INX $rs1, $rs1)>;
-def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64>;
-def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64>;
-def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64>;
-def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64, i64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64, i64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64, i64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64, i64>;
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
// Match signaling FEQ_D
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETEQ)),
+def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETEQ)),
(AND (XLenVT (FLE_D_IN32X $rs1, $rs2)),
(XLenVT (FLE_D_IN32X $rs2, $rs1)))>;
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs2, SETOEQ)),
+def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETOEQ)),
(AND (XLenVT (FLE_D_IN32X $rs1, $rs2)),
(XLenVT (FLE_D_IN32X $rs2, $rs1)))>;
// If both operands are the same, use a single FLE.
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETEQ)),
+def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETEQ)),
(FLE_D_IN32X $rs1, $rs1)>;
-def : Pat<(XLenVT (strict_fsetccs (f64 FPR64IN32X:$rs1), FPR64IN32X:$rs1, SETOEQ)),
+def : Pat<(i32 (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETOEQ)),
(FLE_D_IN32X $rs1, $rs1)>;
-def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64>;
-def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64>;
-def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64>;
-def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64, i32>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64, i32>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64, i32>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64, i32>;
} // Predicates = [HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtD] in {
@@ -511,7 +518,7 @@ def SplitF64Pseudo
} // Predicates = [HasStdExtD, NoStdExtZfa, IsRV32]
let Predicates = [HasStdExtZdinx, IsRV64] in {
-defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64>;
+defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64, i64>;
def PseudoFROUND_D_INX : PseudoFROUND<FPR64INX, f64>;
@@ -523,9 +530,9 @@ def : StPat<store, SD, GPR, f64>;
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
-defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64>;
+defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64, i32>;
-def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;
+def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64, i32>;
/// Loads
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
@@ -537,8 +544,8 @@ def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12_l
} // Predicates = [HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in {
-def : LdPat<load, LD_RV32, f64>;
-def : StPat<store, SD_RV32, GPRPair, f64>;
+def : LdPat<load, LD_RV32, f64, i32>;
+def : StPat<store, SD_RV32, GPRPair, f64, i32>;
}
let Predicates = [HasStdExtD, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index fde030e..6571d99 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -131,7 +131,7 @@ def FPR32INX : RegisterOperand<GPRF32> {
// The DAGOperand can be unset if the predicates are not enough to define it.
class ExtInfo<string suffix, string space, list<Predicate> predicates,
ValueType primaryvt, DAGOperand primaryty, DAGOperand f32ty,
- DAGOperand f64ty, DAGOperand f16ty> {
+ DAGOperand f64ty, DAGOperand f16ty, ValueType intvt = XLenVT> {
list<Predicate> Predicates = predicates;
string Suffix = suffix;
string Space = space;
@@ -140,6 +140,7 @@ class ExtInfo<string suffix, string space, list<Predicate> predicates,
DAGOperand F32Ty = f32ty;
DAGOperand F64Ty = f64ty;
ValueType PrimaryVT = primaryvt;
+ ValueType IntVT = intvt;
}
def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>;
@@ -314,9 +315,9 @@ multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.PrimaryTy, Commutable>;
}
-class PseudoFROUND<DAGOperand Ty, ValueType vt>
+class PseudoFROUND<DAGOperand Ty, ValueType vt, ValueType intvt = XLenVT>
: Pseudo<(outs Ty:$rd), (ins Ty:$rs1, Ty:$rs2, ixlenimm:$rm),
- [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm)))]> {
+ [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, (intvt timm:$rm))))]> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
@@ -529,13 +530,14 @@ def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
/// Generic pattern classes
class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond,
- RVInstCommon Inst, ValueType vt>
- : Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>;
+ RVInstCommon Inst, ValueType vt, ValueType intvt = XLenVT>
+ : Pat<(intvt (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>;
multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond,
RVInstCommon Inst, ExtInfo Ext> {
let Predicates = Ext.Predicates in
def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond,
- !cast<RVInstCommon>(Inst#Ext.Suffix), Ext.PrimaryVT>;
+ !cast<RVInstCommon>(Inst#Ext.Suffix),
+ Ext.PrimaryVT, Ext.IntVT>;
}
class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst,
@@ -549,14 +551,15 @@ multiclass PatFprFpr_m<SDPatternOperator OpNode, RVInstR Inst,
}
class PatFprFprDynFrm<SDPatternOperator OpNode, RVInstRFrm Inst,
- DAGOperand RegTy, ValueType vt>
- : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), (Inst $rs1, $rs2, FRM_DYN)>;
+ DAGOperand RegTy, ValueType vt, ValueType intvt>
+ : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)),
+ (Inst $rs1, $rs2,(intvt FRM_DYN))>;
multiclass PatFprFprDynFrm_m<SDPatternOperator OpNode, RVInstRFrm Inst,
ExtInfo Ext> {
let Predicates = Ext.Predicates in
def Ext.Suffix : PatFprFprDynFrm<OpNode,
!cast<RVInstRFrm>(Inst#Ext.Suffix),
- Ext.PrimaryTy, Ext.PrimaryVT>;
+ Ext.PrimaryTy, Ext.PrimaryVT, Ext.IntVT>;
}
/// Float conversion operations
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index d8f5d3e..aa8f1a1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -669,19 +669,19 @@ let Predicates = [HasVendorXCValu, IsRV32] in {
// Patterns for load & store operations
//===----------------------------------------------------------------------===//
class CVLdrrPat<PatFrag LoadOp, RVInst Inst>
- : Pat<(XLenVT (LoadOp CVrr:$regreg)),
+ : Pat<(i32 (LoadOp CVrr:$regreg)),
(Inst CVrr:$regreg)>;
class CVStriPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, simm12_lo:$imm12),
+ : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, simm12_lo:$imm12),
(Inst GPR:$rs2, GPR:$rs1, simm12_lo:$imm12)>;
class CVStrriPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, GPR:$rs3),
+ : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, GPR:$rs3),
(Inst GPR:$rs2, GPR:$rs1, GPR:$rs3)>;
class CVStrrPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), CVrr:$regreg),
+ : Pat<(StoreOp (i32 GPR:$rs2), CVrr:$regreg),
(Inst GPR:$rs2, CVrr:$regreg)>;
let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in {
@@ -725,17 +725,17 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
(CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm),
(CV_LO5 cv_uimm10:$imm))>;
- def : PatGpr<cttz, CV_FF1>;
- def : PatGpr<ctlz, CV_FL1>;
+ def : PatGpr<cttz, CV_FF1, i32>;
+ def : PatGpr<ctlz, CV_FL1, i32>;
def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>;
- def : PatGpr<ctpop, CV_CNT>;
+ def : PatGpr<ctpop, CV_CNT, i32>;
- def : PatGprGpr<rotr, CV_ROR>;
+ def : PatGprGpr<rotr, CV_ROR, i32>;
def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts,
cv_tuimm2:$radix),
(CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
- def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
+ def : Pat<(bitreverse (i32 GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
}
class PatCoreVAluGpr<string intr, string asm> :
@@ -760,18 +760,18 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> {
}
let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
- def : PatGpr<abs, CV_ABS>;
- def : PatGprGpr<setle, CV_SLE>;
- def : PatGprGpr<setule, CV_SLEU>;
- def : PatGprGpr<smin, CV_MIN>;
- def : PatGprGpr<umin, CV_MINU>;
- def : PatGprGpr<smax, CV_MAX>;
- def : PatGprGpr<umax, CV_MAXU>;
-
- def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>;
- def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
- def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
- def : Pat<(and (XLenVT GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>;
+ def : PatGpr<abs, CV_ABS, i32>;
+ def : PatGprGpr<setle, CV_SLE, i32>;
+ def : PatGprGpr<setule, CV_SLEU, i32>;
+ def : PatGprGpr<smin, CV_MIN, i32>;
+ def : PatGprGpr<umin, CV_MINU, i32>;
+ def : PatGprGpr<smax, CV_MAX, i32>;
+ def : PatGprGpr<umax, CV_MAXU, i32>;
+
+ def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>;
+ def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
+ def : Pat<(and (i32 GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
+ def : Pat<(and (i32 GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>;
defm CLIP : PatCoreVAluGprImm<int_riscv_cv_alu_clip>;
defm CLIPU : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>;
@@ -790,9 +790,9 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
//===----------------------------------------------------------------------===//
let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in {
- def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETEQ, bb:$imm12),
+ def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETEQ, bb:$imm12),
(CV_BEQIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>;
- def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETNE, bb:$imm12),
+ def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETNE, bb:$imm12),
(CV_BNEIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>;
defm CC_SImm5_CV : SelectCC_GPR_riirr<GPR, simm5>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 5e1d07a..4537bfe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1648,10 +1648,10 @@ def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb0
} // Predicates = [HasVendorXqcilsm, IsRV32]
let Predicates = [HasVendorXqcili, IsRV32] in {
-def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tglobaladdr:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tblockaddress:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tjumptable:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tconstpool:$A)), (QC_E_LI bare_simm32:$A)>;
} // Predicates = [HasVendorXqcili, IsRV32]
//===----------------------------------------------------------------------===/i
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 014da99..c31713e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -69,16 +69,16 @@ def ZhinxminExt : ExtInfo<"_INX", "Zfinx",
f16, FPR16INX, FPR32INX, ?, FPR16INX>;
def ZhinxZdinxExt : ExtInfo<"_INX", "Zfinx",
[HasStdExtZhinx, HasStdExtZdinx, IsRV64],
- ?, ?, FPR32INX, FPR64INX, FPR16INX>;
+ ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>;
def ZhinxminZdinxExt : ExtInfo<"_INX", "Zfinx",
[HasStdExtZhinxmin, HasStdExtZdinx, IsRV64],
- ?, ?, FPR32INX, FPR64INX, FPR16INX>;
+ ?, ?, FPR32INX, FPR64INX, FPR16INX, i64>;
def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32",
[HasStdExtZhinx, HasStdExtZdinx, IsRV32],
- ?, ?, FPR32INX, FPR64IN32X, FPR16INX>;
+ ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>;
def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "ZdinxGPRPairRV32",
[HasStdExtZhinxmin, HasStdExtZdinx, IsRV32],
- ?, ?, FPR32INX, FPR64IN32X, FPR16INX>;
+ ?, ?, FPR32INX, FPR64IN32X, FPR16INX, i32>;
defvar ZfhExts = [ZfhExt, ZhinxExt];
defvar ZfhminExts = [ZfhminExt, ZhinxminExt];
@@ -607,13 +607,16 @@ def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs
let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
-def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>;
+def : Pat<(any_fpround FPR64IN32X:$rs1),
+ (FCVT_H_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>;
+def : Pat<(any_fpextend FPR16INX:$rs1),
+ (FCVT_D_H_IN32X FPR16INX:$rs1, (i32 FRM_RNE))>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2),
- (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>;
+ (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, (i32 FRM_DYN)))>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2),
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, (i32 FRM_RNE)))>;
} // Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64] in {
diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt
index 46afe03..eab7b21 100644
--- a/llvm/lib/Target/SPIRV/CMakeLists.txt
+++ b/llvm/lib/Target/SPIRV/CMakeLists.txt
@@ -36,6 +36,7 @@ add_llvm_target(SPIRVCodeGen
SPIRVMetadata.cpp
SPIRVModuleAnalysis.cpp
SPIRVStructurizer.cpp
+ SPIRVCombinerHelper.cpp
SPIRVPreLegalizer.cpp
SPIRVPreLegalizerCombiner.cpp
SPIRVPostLegalizer.cpp
diff --git a/llvm/lib/Target/SPIRV/SPIRVCombine.td b/llvm/lib/Target/SPIRV/SPIRVCombine.td
index 6f726e0..fde56c4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCombine.td
+++ b/llvm/lib/Target/SPIRV/SPIRVCombine.td
@@ -11,8 +11,8 @@ include "llvm/Target/GlobalISel/Combine.td"
def vector_length_sub_to_distance_lowering : GICombineRule <
(defs root:$root),
(match (wip_match_opcode G_INTRINSIC):$root,
- [{ return matchLengthToDistance(*${root}, MRI); }]),
- (apply [{ applySPIRVDistance(*${root}, MRI, B); }])
+ [{ return Helper.matchLengthToDistance(*${root}); }]),
+ (apply [{ Helper.applySPIRVDistance(*${root}); }])
>;
def SPIRVPreLegalizerCombiner
diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp
new file mode 100644
index 0000000..267794c
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp
@@ -0,0 +1,60 @@
+//===-- SPIRVCombinerHelper.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVCombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+SPIRVCombinerHelper::SPIRVCombinerHelper(
+ GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
+ GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const SPIRVSubtarget &STI)
+ : CombinerHelper(Observer, B, IsPreLegalize, VT, MDT, LI), STI(STI) {}
+
+/// This match is part of a combine that
+/// rewrites length(X - Y) to distance(X, Y)
+/// (f32 (g_intrinsic length
+/// (g_fsub (vXf32 X) (vXf32 Y))))
+/// ->
+/// (f32 (g_intrinsic distance
+/// (vXf32 X) (vXf32 Y)))
+///
+bool SPIRVCombinerHelper::matchLengthToDistance(MachineInstr &MI) const {
+ if (MI.getOpcode() != TargetOpcode::G_INTRINSIC ||
+ cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length)
+ return false;
+
+ // First operand of MI is `G_INTRINSIC` so start at operand 2.
+ Register SubReg = MI.getOperand(2).getReg();
+ MachineInstr *SubInstr = MRI.getVRegDef(SubReg);
+ if (SubInstr->getOpcode() != TargetOpcode::G_FSUB)
+ return false;
+
+ return true;
+}
+
+void SPIRVCombinerHelper::applySPIRVDistance(MachineInstr &MI) const {
+ // Extract the operands for X and Y from the match criteria.
+ Register SubDestReg = MI.getOperand(2).getReg();
+ MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg);
+ Register SubOperand1 = SubInstr->getOperand(1).getReg();
+ Register SubOperand2 = SubInstr->getOperand(2).getReg();
+ Register ResultReg = MI.getOperand(0).getReg();
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildIntrinsic(Intrinsic::spv_distance, ResultReg)
+ .addUse(SubOperand1)
+ .addUse(SubOperand2);
+
+ MI.eraseFromParent();
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h
new file mode 100644
index 0000000..0b39d34
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.h
@@ -0,0 +1,38 @@
+//===-- SPIRVCombinerHelper.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This contains common combine transformations that may be used in a combine
+/// pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H
+
+#include "SPIRVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+
+namespace llvm {
+class SPIRVCombinerHelper : public CombinerHelper {
+protected:
+ const SPIRVSubtarget &STI;
+
+public:
+ using CombinerHelper::CombinerHelper;
+ SPIRVCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ bool IsPreLegalize, GISelValueTracking *VT,
+ MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const SPIRVSubtarget &STI);
+
+ bool matchLengthToDistance(MachineInstr &MI) const;
+ void applySPIRVDistance(MachineInstr &MI) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVCOMBINERHELPER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 989950f..a466ab2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -316,6 +316,9 @@ private:
bool selectImageWriteIntrinsic(MachineInstr &I) const;
bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectResourceNonUniformIndex(Register &ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const;
bool selectModf(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
bool selectUpdateCounter(Register &ResVReg, const SPIRVType *ResType,
@@ -347,7 +350,7 @@ private:
SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding,
uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name,
+ StringRef Name,
MachineIRBuilder MIRBuilder) const;
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
@@ -364,6 +367,7 @@ private:
MachineInstr &I) const;
bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType,
GIntrinsic &HandleDef, MachineInstr &Pos) const;
+ void decorateUsesAsNonUniform(Register &NonUniformReg) const;
};
bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) {
@@ -3465,6 +3469,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_discard: {
return selectDiscard(ResVReg, ResType, I);
}
+ case Intrinsic::spv_resource_nonuniformindex: {
+ return selectResourceNonUniformIndex(ResVReg, ResType, I);
+ }
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
@@ -3504,7 +3511,6 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
uint32_t Binding = getIConstVal(Intr.getOperand(3).getReg(), MRI);
uint32_t ArraySize = getIConstVal(MainHandleDef->getOperand(4).getReg(), MRI);
Register IndexReg = MainHandleDef->getOperand(5).getReg();
- const bool IsNonUniform = false;
std::string CounterName =
getStringValueFromReg(MainHandleDef->getOperand(6).getReg(), *MRI) +
".counter";
@@ -3513,7 +3519,7 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
MachineIRBuilder MIRBuilder(I);
Register CounterVarReg = buildPointerToResource(
GR.getPointeeType(ResType), GR.getPointerStorageClass(ResType), Set,
- Binding, ArraySize, IndexReg, IsNonUniform, CounterName, MIRBuilder);
+ Binding, ArraySize, IndexReg, CounterName, MIRBuilder);
return BuildCOPY(ResVReg, CounterVarReg, I);
}
@@ -3713,6 +3719,55 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
+ Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const {
+ Register ObjReg = I.getOperand(2).getReg();
+ if (!BuildCOPY(ResVReg, ObjReg, I))
+ return false;
+
+ buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {});
+ // Check for the registers that use the index marked as non-uniform
+ // and recursively mark them as non-uniform.
+ // Per the spec, it's necessary that the final argument used for
+ // load/store/sample/atomic must be decorated, so we need to propagate the
+ // decoration through access chains and copies.
+ // https://docs.vulkan.org/samples/latest/samples/extensions/descriptor_indexing/README.html#_when_to_use_non_uniform_indexing_qualifier
+ decorateUsesAsNonUniform(ResVReg);
+ return true;
+}
+
+void SPIRVInstructionSelector::decorateUsesAsNonUniform(
+ Register &NonUniformReg) const {
+ llvm::SmallVector<Register> WorkList = {NonUniformReg};
+ while (WorkList.size() > 0) {
+ Register CurrentReg = WorkList.back();
+ WorkList.pop_back();
+
+ bool IsDecorated = false;
+ for (MachineInstr &Use : MRI->use_instructions(CurrentReg)) {
+ if (Use.getOpcode() == SPIRV::OpDecorate &&
+ Use.getOperand(1).getImm() == SPIRV::Decoration::NonUniformEXT) {
+ IsDecorated = true;
+ continue;
+ }
+ // Check if the instruction has the result register and add it to the
+ // worklist.
+ if (Use.getOperand(0).isReg() && Use.getOperand(0).isDef()) {
+ Register ResultReg = Use.getOperand(0).getReg();
+ if (ResultReg == CurrentReg)
+ continue;
+ WorkList.push_back(ResultReg);
+ }
+ }
+
+ if (!IsDecorated) {
+ buildOpDecorate(CurrentReg, *MRI->getVRegDef(CurrentReg), TII,
+ SPIRV::Decoration::NonUniformEXT, {});
+ }
+ }
+ return;
+}
+
bool SPIRVInstructionSelector::extractSubvector(
Register &ResVReg, const SPIRVType *ResType, Register &ReadReg,
MachineInstr &InsertionPoint) const {
@@ -3784,7 +3839,7 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
Register SPIRVInstructionSelector::buildPointerToResource(
const SPIRVType *SpirvResType, SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name, MachineIRBuilder MIRBuilder) const {
+ StringRef Name, MachineIRBuilder MIRBuilder) const {
const Type *ResType = GR.getTypeForSPIRVType(SpirvResType);
if (ArraySize == 1) {
SPIRVType *PtrType =
@@ -3803,14 +3858,7 @@ Register SPIRVInstructionSelector::buildPointerToResource(
SPIRVType *ResPointerType =
GR.getOrCreateSPIRVPointerType(ResType, MIRBuilder, SC);
-
Register AcReg = MRI->createVirtualRegister(GR.getRegClass(ResPointerType));
- if (IsNonUniform) {
- // It is unclear which value needs to be marked an non-uniform, so both
- // the index and the access changed are decorated as non-uniform.
- buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- }
MIRBuilder.buildInstr(SPIRV::OpAccessChain)
.addDef(AcReg)
@@ -4560,9 +4608,6 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI);
uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI);
Register IndexReg = HandleDef.getOperand(5).getReg();
- // FIXME: The IsNonUniform flag needs to be set based on resource analysis.
- // https://github.com/llvm/llvm-project/issues/155701
- bool IsNonUniform = false;
std::string Name =
getStringValueFromReg(HandleDef.getOperand(6).getReg(), *MRI);
@@ -4576,13 +4621,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
SC = GR.getPointerStorageClass(ResType);
}
- Register VarReg =
- buildPointerToResource(VarType, SC, Set, Binding, ArraySize, IndexReg,
- IsNonUniform, Name, MIRBuilder);
-
- if (IsNonUniform)
- buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT,
- {});
+ Register VarReg = buildPointerToResource(VarType, SC, Set, Binding, ArraySize,
+ IndexReg, Name, MIRBuilder);
// The handle for the buffer is the pointer to the resource. For an image, the
// handle is the image object. So images get an extra load.
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp
index 8356751..48f4047 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizerCombiner.cpp
@@ -1,4 +1,3 @@
-
//===-- SPIRVPreLegalizerCombiner.cpp - combine legalization ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -13,24 +12,17 @@
//===----------------------------------------------------------------------===//
#include "SPIRV.h"
-#include "SPIRVTargetMachine.h"
+#include "SPIRVCombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
-#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
-#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IntrinsicsSPIRV.h"
#define GET_GICOMBINER_DEPS
#include "SPIRVGenPreLegalizeGICombiner.inc"
@@ -47,72 +39,9 @@ namespace {
#include "SPIRVGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-/// This match is part of a combine that
-/// rewrites length(X - Y) to distance(X, Y)
-/// (f32 (g_intrinsic length
-/// (g_fsub (vXf32 X) (vXf32 Y))))
-/// ->
-/// (f32 (g_intrinsic distance
-/// (vXf32 X) (vXf32 Y)))
-///
-bool matchLengthToDistance(MachineInstr &MI, MachineRegisterInfo &MRI) {
- if (MI.getOpcode() != TargetOpcode::G_INTRINSIC ||
- cast<GIntrinsic>(MI).getIntrinsicID() != Intrinsic::spv_length)
- return false;
-
- // First operand of MI is `G_INTRINSIC` so start at operand 2.
- Register SubReg = MI.getOperand(2).getReg();
- MachineInstr *SubInstr = MRI.getVRegDef(SubReg);
- if (!SubInstr || SubInstr->getOpcode() != TargetOpcode::G_FSUB)
- return false;
-
- return true;
-}
-void applySPIRVDistance(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) {
-
- // Extract the operands for X and Y from the match criteria.
- Register SubDestReg = MI.getOperand(2).getReg();
- MachineInstr *SubInstr = MRI.getVRegDef(SubDestReg);
- Register SubOperand1 = SubInstr->getOperand(1).getReg();
- Register SubOperand2 = SubInstr->getOperand(2).getReg();
-
- // Remove the original `spv_length` instruction.
-
- Register ResultReg = MI.getOperand(0).getReg();
- DebugLoc DL = MI.getDebugLoc();
- MachineBasicBlock &MBB = *MI.getParent();
- MachineBasicBlock::iterator InsertPt = MI.getIterator();
-
- // Build the `spv_distance` intrinsic.
- MachineInstrBuilder NewInstr =
- BuildMI(MBB, InsertPt, DL, B.getTII().get(TargetOpcode::G_INTRINSIC));
- NewInstr
- .addDef(ResultReg) // Result register
- .addIntrinsicID(Intrinsic::spv_distance) // Intrinsic ID
- .addUse(SubOperand1) // Operand X
- .addUse(SubOperand2); // Operand Y
-
- SPIRVGlobalRegistry *GR =
- MI.getMF()->getSubtarget<SPIRVSubtarget>().getSPIRVGlobalRegistry();
- auto RemoveAllUses = [&](Register Reg) {
- SmallVector<MachineInstr *, 4> UsesToErase(
- llvm::make_pointer_range(MRI.use_instructions(Reg)));
-
- // calling eraseFromParent to early invalidates the iterator.
- for (auto *MIToErase : UsesToErase) {
- GR->invalidateMachineInstr(MIToErase);
- MIToErase->eraseFromParent();
- }
- };
- RemoveAllUses(SubDestReg); // remove all uses of FSUB Result
- GR->invalidateMachineInstr(SubInstr);
- SubInstr->eraseFromParent(); // remove FSUB instruction
-}
-
class SPIRVPreLegalizerCombinerImpl : public Combiner {
protected:
- const CombinerHelper Helper;
+ const SPIRVCombinerHelper Helper;
const SPIRVPreLegalizerCombinerImplRuleConfig &RuleConfig;
const SPIRVSubtarget &STI;
@@ -147,7 +76,7 @@ SPIRVPreLegalizerCombinerImpl::SPIRVPreLegalizerCombinerImpl(
const SPIRVSubtarget &STI, MachineDominatorTree *MDT,
const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &VT, CSEInfo),
- Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI),
RuleConfig(RuleConfig), STI(STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "SPIRVGenPreLegalizeGICombiner.inc"
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1cfcb1f..eea84a2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3633,7 +3633,7 @@ bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
}
bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
- const SDNode *N, CombineLevel Level) const {
+ const SDNode *N) const {
assert(((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) ||
(N->getOpcode() == ISD::SRL &&
@@ -3648,7 +3648,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
// the fold for non-splats yet.
return N->getOperand(1) == N->getOperand(0).getOperand(1);
}
- return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
+ return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N);
}
bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b55556a..e28b9c1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1244,8 +1244,7 @@ namespace llvm {
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
const Value *Rhs) const override;
- bool shouldFoldConstantShiftPairToMask(const SDNode *N,
- CombineLevel Level) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ac3626d..f021094 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -375,6 +375,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case MuslSF:
return "muslsf";
case MuslX32: return "muslx32";
+ case MuslWALI:
+ return "muslwali";
case Simulator: return "simulator";
case Pixel: return "pixel";
case Vertex: return "vertex";
@@ -767,6 +769,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("muslf32", Triple::MuslF32)
.StartsWith("muslsf", Triple::MuslSF)
.StartsWith("muslx32", Triple::MuslX32)
+ .StartsWith("muslwali", Triple::MuslWALI)
.StartsWith("musl", Triple::Musl)
.StartsWith("msvc", Triple::MSVC)
.StartsWith("itanium", Triple::Itanium)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8c8fc69..6b67b48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -544,8 +544,18 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
Swapped ? OOp : C, "", &SI);
- if (isa<FPMathOperator>(&SI))
- cast<Instruction>(NewSel)->setFastMathFlags(FMF);
+ if (isa<FPMathOperator>(&SI)) {
+ FastMathFlags NewSelFMF = FMF;
+ // We cannot propagate ninf from the original select, because OOp may be
+ // inf and the flag only guarantees that FalseVal (op OOp) is never
+ // infinity.
+ // Examples: -inf + +inf = NaN, -inf - -inf = NaN, 0 * inf = NaN
+ // Specifically, if the original select has both ninf and nnan, we can
+ // safely propagate the flag.
+ NewSelFMF.setNoInfs(TVI->hasNoInfs() ||
+ (NewSelFMF.noInfs() && NewSelFMF.noNaNs()));
+ cast<Instruction>(NewSel)->setFastMathFlags(NewSelFMF);
+ }
NewSel->takeName(TVI);
BinaryOperator *BO =
BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 45d3d49..b9d332b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2961,6 +2961,7 @@ public:
isa<FixedVectorType>(NewAI.getAllocatedType())
? cast<FixedVectorType>(NewAI.getAllocatedType())->getElementType()
: Type::getInt8Ty(NewAI.getContext());
+ unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
// Helper to check if a type is
// 1. A fixed vector type
@@ -2991,10 +2992,17 @@ public:
// Do not handle the case if
// 1. The store does not meet the conditions in the helper function
// 2. The store is volatile
+ // 3. The total store size is not a multiple of the allocated element
+ // type size
if (!IsTypeValidForTreeStructuredMerge(
SI->getValueOperand()->getType()) ||
SI->isVolatile())
return std::nullopt;
+ auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ if (NumElts * EltSize % AllocatedEltTySize != 0)
+ return std::nullopt;
StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
SI->getValueOperand());
} else {
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 3ae570c..4f1ff7b 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -20,9 +20,8 @@
using namespace llvm;
-namespace {
-void nameInstructions(Function &F) {
- for (auto &Arg : F.args()) {
+static void nameInstructions(Function &F) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasName())
Arg.setName("arg");
}
@@ -38,8 +37,6 @@ void nameInstructions(Function &F) {
}
}
-} // namespace
-
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
nameInstructions(F);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e62d57e..50136a8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9348,13 +9348,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- // Ensure that the start values for all header phi recipes are updated before
- // vectorizing the epilogue loop.
VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
+ // When vectorizing the epilogue loop, the canonical induction needs to be
+ // adjusted by the value after the main vector loop. Find the resume value
+ // created during execution of the main VPlan. It must be the first phi in the
+ // loop preheader. Use the value to increment the canonical IV, and update all
+ // users in the loop region to use the adjusted value.
// FIXME: Improve modeling for canonical IV start values in the epilogue
// loop.
using namespace llvm::PatternMatch;
@@ -9389,10 +9388,16 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+ IV->replaceAllUsesWith(Add);
+ Add->setOperand(0, IV);
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
+ // Ensure that the start values for all header phi recipes are updated before
+ // vectorizing the epilogue loop. Skip the canonical IV, which has been
+ // handled above.
for (VPRecipeBase &R : drop_begin(Header->phis())) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cfa8d27..2388375 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2245,6 +2245,26 @@ public:
Align Alignment, const int64_t Diff, Value *Ptr0,
Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with run-time stride).
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ScalarTy type of loads.
+ /// \param CommonAlignment common alignement of loads as computed by
+ /// `computeCommonAlignment<LoadInst>`.
+ /// \param SortedIndicies is a list of indicies computed by this function such
+ /// that the sequence `PointerOps[SortedIndices[0]],
+ /// PointerOps[SortedIndicies[1]], ..., PointerOps[SortedIndices[n]]` is
+ /// ordered by the coefficient of the stride. For example, if PointerOps is
+ /// `%base + %stride, %base, %base + 2 * stride` the `SortedIndices` will be
+ /// `[1, 0, 2]`. We follow the convention that if `SortedIndices` has to be
+ /// `0, 1, 2, 3, ...` we return empty vector for `SortedIndicies`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const;
+
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
@@ -6875,6 +6895,24 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
}
+bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ScalarTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, Sz);
+ if (Sz <= MinProfitableStridedLoads || !TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+ if (const SCEV *Stride =
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, SortedIndices)) {
+ SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
+ SPtrInfo.StrideSCEV = Stride;
+ return true;
+ }
+ return false;
+}
+
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, StridedPtrInfo &SPtrInfo,
@@ -6915,15 +6953,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
auto *VecTy = getWidenedType(ScalarTy, Sz);
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (!IsSorted) {
- if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
- if (const SCEV *Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order);
- Stride && TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
- SPtrInfo.Ty = getWidenedType(ScalarTy, PointerOps.size());
- SPtrInfo.StrideSCEV = Stride;
- return LoadsState::StridedVectorize;
- }
- }
+ if (analyzeRtStrideCandidate(PointerOps, ScalarTy, CommonAlignment, Order,
+ SPtrInfo))
+ return LoadsState::StridedVectorize;
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment))
@@ -10632,7 +10664,9 @@ class InstructionsCompatibilityAnalysis {
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
BasicBlock *Parent = nullptr;
// Checks if the instruction has supported opcode.
- auto IsSupportedInstruction = [&](Instruction *I) {
+ auto IsSupportedInstruction = [&](Instruction *I, bool AnyUndef) {
+ if (AnyUndef && (I->isIntDivRem() || I->isFPDivRem() || isa<CallInst>(I)))
+ return false;
return I && isSupportedOpcode(I->getOpcode()) &&
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
};
@@ -10640,10 +10674,13 @@ class InstructionsCompatibilityAnalysis {
// will be unable to schedule anyway.
SmallDenseSet<Value *, 8> Operands;
SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
+ bool AnyUndef = false;
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!I) {
+ AnyUndef |= isa<UndefValue>(V);
continue;
+ }
if (!DT.isReachableFromEntry(I->getParent()))
continue;
if (Candidates.empty()) {
@@ -10678,7 +10715,7 @@ class InstructionsCompatibilityAnalysis {
if (P.second.size() < BestOpcodeNum)
continue;
for (Instruction *I : P.second) {
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
+ if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) {
MainOp = I;
BestOpcodeNum = P.second.size();
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 2555ebe..1fea068 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1777,6 +1777,9 @@ InstructionCost VPCostContext::getScalarizationOverhead(
if (VF.isScalar())
return 0;
+ assert(!VF.isScalable() &&
+ "Scalarization overhead not supported for scalable vectors");
+
InstructionCost ScalarizationCost = 0;
// Compute the cost of scalarizing the result if needed.
if (!ResultTy->isVoidTy()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 600ff8a..8e916772 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3174,6 +3174,9 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
+ if (VF.isScalable() && !isSingleScalar())
+ return InstructionCost::getInvalid();
+
switch (UI->getOpcode()) {
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because the cost of GEPs in
@@ -3221,9 +3224,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
return ScalarCallCost;
}
- if (VF.isScalable())
- return InstructionCost::getInvalid();
-
return ScalarCallCost * VF.getFixedValue() +
Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);
}
@@ -3274,9 +3274,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
}
case Instruction::Load:
case Instruction::Store: {
- if (VF.isScalable() && !isSingleScalar())
- return InstructionCost::getInvalid();
-
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
const VPRegionBlock *ParentRegion = getParent()->getParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c8a2d84..7563cd7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1234,6 +1234,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // Hoist an invariant increment Y of a phi X, by having X start at Y.
+ if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() &&
+ isa<VPPhi>(X)) {
+ auto *Phi = cast<VPPhi>(X);
+ if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->setOperand(0, Y);
+ Def->replaceAllUsesWith(Phi);
+ return;
+ }
+ }
+
// VPVectorPointer for part 0 can be replaced by their start pointer.
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
if (VecPtr->isFirstPart()) {