aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:31:57 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-09 18:33:27 +0900
commitdf025ebf872052c0761d44a3ef9b65e9675af8a8 (patch)
tree9b4e94583e2536546d6606270bcdf846c95e1ba2 /llvm/lib/CodeGen/SelectionDAG
parent4428c9d0b1344179f85a72e183a44796976521e3 (diff)
parentbdcf47e4bcb92889665825654bb80a8bbe30379e (diff)
downloadllvm-users/chapuni/cov/single/loop.zip
llvm-users/chapuni/cov/single/loop.tar.gz
llvm-users/chapuni/cov/single/loop.tar.bz2
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/loopusers/chapuni/cov/single/loop
Conflicts: clang/lib/CodeGen/CoverageMappingGen.cpp
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp60
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp77
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp3
7 files changed, 114 insertions, 55 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cbfef2..da3c834 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -141,7 +141,7 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth(
static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable(
"combiner-reduce-load-op-store-width-force-narrowing-profitable",
cl::Hidden, cl::init(false),
- cl::desc("DAG combiner force override the narrowing profitable check when"
+ cl::desc("DAG combiner force override the narrowing profitable check when "
"reducing the width of load/op/store sequences"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
@@ -3949,6 +3949,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;
+ // Similar to the previous rule, but this time targeting an expanded abs.
+ // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
+ // as well as
+ // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
+ // Note that these two are applicable to both signed and unsigned min/max.
+ SDValue X;
+ SDValue S0;
+ auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
+ if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
+ m_UMax(m_Value(X), NegPat),
+ m_SMin(m_Value(X), NegPat),
+ m_UMin(m_Value(X), NegPat))))) {
+ unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
+ if (hasOperation(NewOpc, VT))
+ return DAG.getNode(NewOpc, DL, VT, X, S0);
+ }
+
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
@@ -20438,10 +20455,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
- if (!VT.isFloatingPoint() ||
- VT != ST->getMemoryVT() ||
- LD->isNonTemporal() ||
- ST->isNonTemporal() ||
+ if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() || ST->isNonTemporal() ||
LD->getPointerInfo().getAddrSpace() != 0 ||
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
@@ -23088,8 +23103,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
+ // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
if (LegalTypes && BCSrc.getValueType().isInteger() &&
- BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ BCSrc.getScalarValueSizeInBits() ==
+ BCSrc.getOperand(0).getScalarValueSizeInBits()) {
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
// trunc i64 X to i32
SDValue X = BCSrc.getOperand(0);
@@ -24288,8 +24306,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
// Keep track of what we encounter.
- bool AnyInteger = false;
- bool AnyFP = false;
+ EVT AnyFPVT;
+
for (const SDValue &Op : N->ops()) {
if (ISD::BITCAST == Op.getOpcode() &&
!Op.getOperand(0).getValueType().isVector())
@@ -24303,27 +24321,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
// If it's neither, bail out, it could be something weird like x86mmx.
EVT LastOpVT = Ops.back().getValueType();
if (LastOpVT.isFloatingPoint())
- AnyFP = true;
- else if (LastOpVT.isInteger())
- AnyInteger = true;
- else
+ AnyFPVT = LastOpVT;
+ else if (!LastOpVT.isInteger())
return SDValue();
}
// If any of the operands is a floating point scalar bitcast to a vector,
// use floating point types throughout, and bitcast everything.
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
- if (AnyFP) {
- SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
- if (AnyInteger) {
- for (SDValue &Op : Ops) {
- if (Op.getValueType() == SVT)
- continue;
- if (Op.isUndef())
- Op = DAG.getNode(ISD::UNDEF, DL, SVT);
- else
- Op = DAG.getBitcast(SVT, Op);
- }
+ if (AnyFPVT != EVT()) {
+ SVT = AnyFPVT;
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = DAG.getNode(ISD::UNDEF, DL, SVT);
+ else
+ Op = DAG.getBitcast(SVT, Op);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index db21e70..89a00c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -1081,6 +1083,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::FMAXIMUM:
Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
return;
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
+ Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
+ return;
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1738,7 +1744,8 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
bool IsStrict = Node->isStrictFPOpcode();
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Src = Node->getOperand(OpNo);
- EVT VT = Src.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
SDLoc DL(Node);
// Attempt to expand using TargetLowering.
@@ -1752,11 +1759,11 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
}
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
TargetLowering::Expand) ||
- (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
TargetLowering::Expand)) ||
- TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+ TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
if (IsStrict) {
UnrollStrictFPOp(Node, Results);
return;
@@ -1766,37 +1773,59 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
return;
}
- unsigned BW = VT.getScalarSizeInBits();
+ unsigned BW = SrcVT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
"Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
- SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
+ // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
+ // UINT_TO_FP with a larger float and round to the smaller type
+ if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) ||
+ (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) {
+ EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
+ SDValue UIToFP;
+ SDValue Result;
+ SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
+ EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT);
+ if (IsStrict) {
+ UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
+ {Node->getOperand(0), Src});
+ Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
+ {Node->getOperand(0), UIToFP, TargetZero});
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ } else {
+ UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
+ Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
+ Results.push_back(Result);
+ }
+
+ return;
+ }
+
+ SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
// Constants to clear the upper part of the word.
// Notice that we can also use SHL+SHR, but using a constant is slightly
// faster on x86.
uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
- SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
+ SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
// Two to the power of half-word-size.
- SDValue TWOHW =
- DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
+ SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
// Clear upper part of LO, lower HI
- SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+ SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
if (IsStrict) {
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
- {Node->getValueType(0), MVT::Other},
+ SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
{Node->getOperand(0), HI});
- fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
{fHI.getValue(1), fHI, TWOHW});
- SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
- {Node->getValueType(0), MVT::Other},
+ SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
{Node->getOperand(0), LO});
SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
@@ -1804,8 +1833,7 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
// Add the two halves
SDValue Result =
- DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
- {TF, fHI, fLO});
+ DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
Results.push_back(Result);
Results.push_back(Result.getValue(1));
@@ -1815,13 +1843,12 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
- fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
- SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
// Add the two halves
- Results.push_back(
- DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
+ Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
}
SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
@@ -2246,11 +2273,13 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
DAG.getVectorIdxConstant(i, dl));
SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
DAG.getVectorIdxConstant(i, dl));
+ // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
Ops[i] = DAG.getNode(ISD::SETCC, dl,
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107454a..780eba1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::FLDEXP:
case ISD::ABDS:
case ISD::ABDU:
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 9e5867c..51ee3cc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -125,9 +125,9 @@ static cl::opt<int> MaxReorderWindow(
cl::desc("Number of instructions to allow ahead of the critical path "
"in sched=list-ilp"));
-static cl::opt<unsigned> AvgIPC(
- "sched-avg-ipc", cl::Hidden, cl::init(1),
- cl::desc("Average inst/cycle whan no target itinerary exists."));
+static cl::opt<unsigned>
+ AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle when no target itinerary exists."));
namespace {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 26fc75c..dff7243 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together");
// without a target itinerary. The choice of number here has more to do with
// balancing scheduler heuristics than with the actual machine latency.
static cl::opt<int> HighLatencyCycles(
- "sched-high-latency-cycles", cl::Hidden, cl::init(10),
- cl::desc("Roughly estimate the number of cycles that 'long latency'"
- "instructions take for targets with no itinerary"));
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency' "
+ "instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
: ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 10e8ba9..0dfd030 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate(
return true;
}
+ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) {
+ switch (MinMaxOpc) {
+ default:
+ llvm_unreachable("unrecognized opcode");
+ case ISD::UMIN:
+ return ISD::UMAX;
+ case ISD::UMAX:
+ return ISD::UMIN;
+ case ISD::SMIN:
+ return ISD::SMAX;
+ case ISD::SMAX:
+ return ISD::SMIN;
+ }
+}
+
ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
switch (VecReduceOpcode) {
default:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e87d809..9f57884 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8435,7 +8435,6 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return false;
SDLoc dl(SDValue(Node, 0));
- EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation performs rounding
@@ -8448,7 +8447,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
- SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+ SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);