aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86FixupSetCC.cpp6
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp67
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp30
3 files changed, 92 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 2de89947..ea93a57 100644
--- a/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -136,6 +136,12 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
.addReg(ZeroReg)
.addReg(Reg0)
.addImm(X86::sub_8bit);
+
+ // Redirect the debug-instr-number to the setcc.
+ if (unsigned InstrNum = ZExt->peekDebugInstrNum())
+ MF.makeDebugValueSubstitution({InstrNum, 0},
+ {MI.getDebugInstrNum(), 0});
+
ToErase.push_back(ZExt);
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cd04ff5..3802506 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44615,8 +44615,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
APInt DemandedMask = OriginalDemandedBits << ShAmt;
- // If we just want the sign bit then we don't need to shift it.
- if (OriginalDemandedBits.isSignMask())
+ // If we only want bits that already match the signbit then we don't need
+ // to shift.
+ unsigned NumHiDemandedBits = BitWidth - OriginalDemandedBits.countr_zero();
+ if (TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1) >=
+ NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
// fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
@@ -45169,6 +45172,18 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
return true;
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: {
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(Op.getSimpleValueType(), DemandedElts, DemandedLHS,
+ DemandedRHS);
+ return (!DemandedLHS ||
+ DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS,
+ PoisonOnly, Depth + 1)) &&
+ (!DemandedRHS ||
+ DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS,
+ PoisonOnly, Depth + 1));
+ }
case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
@@ -45239,6 +45254,10 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDI:
case X86ISD::BLENDV:
return false;
+ // SSE packs.
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS:
+ return false;
// SSE target shuffles.
case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
@@ -45438,7 +45457,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
const SDLoc &DL,
const X86Subtarget &Subtarget) {
EVT SrcVT = Src.getValueType();
- if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
+ if (Subtarget.useSoftFloat() || !SrcVT.isSimple() ||
+ SrcVT.getScalarType() != MVT::i1)
return SDValue();
// Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
@@ -52369,16 +52389,41 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub =
- DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ // If EFLAGS is from a CMP that compares the same operands as the earlier
+ // SUB producing X (i.e. CMP X, Y), we can directly use the carry flag with
+ // SBB/ADC without creating a flipped SUB.
+ if (EFLAGS.getOpcode() == X86ISD::CMP &&
+ EFLAGS.getValueType().isInteger() && X == EFLAGS.getOperand(0)) {
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), NewEFLAGS);
+ DAG.getConstant(0, DL, VT), EFLAGS);
+ }
+
+ if (EFLAGS.getOpcode() == X86ISD::SUB &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ // Only create NewSub if we know one of the folds will succeed to avoid
+ // introducing a temporary node that may persist and affect one-use checks
+ // below.
+ if (EFLAGS.getNode()->hasOneUse()) {
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
+ }
+
+ if (IsSub && X == EFLAGS.getValue(0)) {
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(X86ISD::SBB, DL, DAG.getVTList(VT, MVT::i32),
+ EFLAGS.getOperand(0), EFLAGS.getOperand(1),
+ NewEFLAGS);
+ }
}
}
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 278ae46..0ba71ad 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -854,6 +854,7 @@ public:
: Func(F), SC(ShapeC), DT(nullptr) {}
bool combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
bool combineLoadCast(IntrinsicInst *Cast, LoadInst *LD);
+ bool combineTilezero(IntrinsicInst *Cast);
bool combineLdSt(SmallVectorImpl<Instruction *> &Casts);
bool combineAMXcast(TargetLibraryInfo *TLI);
bool transformAMXCast(IntrinsicInst *AMXCast);
@@ -1175,6 +1176,26 @@ bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
return EraseLoad;
}
+// %19 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> zeroinitializer)
+// -->
+// %19 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col)
+bool X86LowerAMXCast::combineTilezero(IntrinsicInst *Cast) {
+ Value *Row = nullptr, *Col = nullptr;
+ Use &U = *(Cast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = cast<IntrinsicInst>(U.getUser());
+ if (!isAMXIntrinsic(II))
+ return false;
+
+ std::tie(Row, Col) = SC->getShape(II, OpNo);
+
+ IRBuilder<> Builder(Cast);
+ Value *NewInst =
+ Builder.CreateIntrinsic(Intrinsic::x86_tilezero_internal, {}, {Row, Col});
+ Cast->replaceAllUsesWith(NewInst);
+ return true;
+}
+
bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
bool Change = false;
for (auto *Cast : Casts) {
@@ -1198,6 +1219,14 @@ bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
for (auto *Store : DeadStores)
Store->eraseFromParent();
} else { // x86_cast_vector_to_tile
+ // %19 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> zeroinitializer)
+ // -->
+ // %19 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col)
+ if (isa<ConstantAggregateZero>(Cast->getOperand(0))) {
+ Change |= combineTilezero(cast<IntrinsicInst>(Cast));
+ continue;
+ }
+
auto *Load = dyn_cast<LoadInst>(Cast->getOperand(0));
if (!Load || !Load->hasOneUse())
continue;
@@ -1210,6 +1239,7 @@ bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
// Set the operand is null so that load instruction can be erased.
Cast->setOperand(0, nullptr);
Load->eraseFromParent();
+ Change = true;
}
}
}