aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp256
1 files changed, 52 insertions, 204 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 49beada..007074c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27946,67 +27946,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
- case Intrinsic::x86_t2rpntlvwz0rs_internal:
- case Intrinsic::x86_t2rpntlvwz0rst1_internal:
- case Intrinsic::x86_t2rpntlvwz1rs_internal:
- case Intrinsic::x86_t2rpntlvwz1rst1_internal:
- case Intrinsic::x86_t2rpntlvwz0_internal:
- case Intrinsic::x86_t2rpntlvwz0t1_internal:
- case Intrinsic::x86_t2rpntlvwz1_internal:
- case Intrinsic::x86_t2rpntlvwz1t1_internal: {
- auto *X86MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
- X86MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
- unsigned IntNo = Op.getConstantOperandVal(1);
- unsigned Opc = 0;
- switch (IntNo) {
- default:
- llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_t2rpntlvwz0_internal:
- Opc = X86::PT2RPNTLVWZ0V;
- break;
- case Intrinsic::x86_t2rpntlvwz0t1_internal:
- Opc = X86::PT2RPNTLVWZ0T1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1_internal:
- Opc = X86::PT2RPNTLVWZ1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1t1_internal:
- Opc = X86::PT2RPNTLVWZ1T1V;
- break;
- case Intrinsic::x86_t2rpntlvwz0rs_internal:
- Opc = X86::PT2RPNTLVWZ0RSV;
- break;
- case Intrinsic::x86_t2rpntlvwz0rst1_internal:
- Opc = X86::PT2RPNTLVWZ0RST1V;
- break;
- case Intrinsic::x86_t2rpntlvwz1rs_internal:
- Opc = X86::PT2RPNTLVWZ1RSV;
- break;
- case Intrinsic::x86_t2rpntlvwz1rst1_internal:
- Opc = X86::PT2RPNTLVWZ1RST1V;
- break;
- }
-
- SDLoc DL(Op);
- SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
-
- SDValue Ops[] = {Op.getOperand(2), // Row
- Op.getOperand(3), // Col0
- Op.getOperand(4), // Col1
- Op.getOperand(5), // Base
- DAG.getTargetConstant(1, DL, MVT::i8), // Scale
- Op.getOperand(6), // Index
- DAG.getTargetConstant(0, DL, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i16), // Segment
- Op.getOperand(0)}; // Chain
-
- MachineSDNode *Res = DAG.getMachineNode(Opc, DL, VTs, Ops);
- SDValue Res0 = DAG.getTargetExtractSubreg(X86::sub_t0, DL, MVT::x86amx,
- SDValue(Res, 0));
- SDValue Res1 = DAG.getTargetExtractSubreg(X86::sub_t1, DL, MVT::x86amx,
- SDValue(Res, 0));
- return DAG.getMergeValues({Res0, Res1, SDValue(Res, 1)}, DL);
- }
case Intrinsic::x86_atomic_bts_rm:
case Intrinsic::x86_atomic_btc_rm:
case Intrinsic::x86_atomic_btr_rm: {
@@ -37745,10 +37684,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert (Imm < 8 && "Illegal tmm index");
return X86::TMM0 + Imm;
};
- auto TMMImmToTMMPair = [](unsigned Imm) {
- assert(Imm < 8 && "Illegal tmm pair index.");
- return X86::TMM0_TMM1 + Imm / 2;
- };
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
@@ -38129,53 +38064,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPBHF8PS:
case X86::PTDPHBF8PS:
case X86::PTDPHF8PS:
- case X86::PTTDPBF16PS:
- case X86::PTTDPFP16PS:
- case X86::PTTCMMIMFP16PS:
- case X86::PTTCMMRLFP16PS:
- case X86::PTCONJTCMMIMFP16PS:
- case X86::PTMMULTF32PS:
- case X86::PTTMMULTF32PS: {
+ case X86::PTMMULTF32PS: {
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
+ // clang-format off
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
- case X86::PTCMMIMFP16PS:
- Opc = X86::TCMMIMFP16PS;
- break;
- case X86::PTCMMRLFP16PS:
- Opc = X86::TCMMRLFP16PS;
- break;
+ case X86::PTCMMIMFP16PS: Opc = X86::TCMMIMFP16PS; break;
+ case X86::PTCMMRLFP16PS: Opc = X86::TCMMRLFP16PS; break;
case X86::PTDPBF8PS: Opc = X86::TDPBF8PS; break;
case X86::PTDPBHF8PS: Opc = X86::TDPBHF8PS; break;
case X86::PTDPHBF8PS: Opc = X86::TDPHBF8PS; break;
case X86::PTDPHF8PS: Opc = X86::TDPHF8PS; break;
- case X86::PTTDPBF16PS:
- Opc = X86::TTDPBF16PS;
- break;
- case X86::PTTDPFP16PS:
- Opc = X86::TTDPFP16PS;
- break;
- case X86::PTTCMMIMFP16PS:
- Opc = X86::TTCMMIMFP16PS;
- break;
- case X86::PTTCMMRLFP16PS:
- Opc = X86::TTCMMRLFP16PS;
- break;
- case X86::PTCONJTCMMIMFP16PS:
- Opc = X86::TCONJTCMMIMFP16PS;
- break;
- case X86::PTMMULTF32PS:
- Opc = X86::TMMULTF32PS;
- break;
- case X86::PTTMMULTF32PS:
- Opc = X86::TTMMULTF32PS;
- break;
+ case X86::PTMMULTF32PS: Opc = X86::TMMULTF32PS; break;
+ // clang-format on
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
@@ -38246,70 +38153,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
- case X86::PT2RPNTLVWZ0:
- case X86::PT2RPNTLVWZ0T1:
- case X86::PT2RPNTLVWZ1:
- case X86::PT2RPNTLVWZ1T1:
- case X86::PT2RPNTLVWZ0RS:
- case X86::PT2RPNTLVWZ0RST1:
- case X86::PT2RPNTLVWZ1RS:
- case X86::PT2RPNTLVWZ1RST1: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc;
-#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction!");
- case X86::PT2RPNTLVWZ0:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
- break;
- case X86::PT2RPNTLVWZ0T1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
- break;
- case X86::PT2RPNTLVWZ1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
- break;
- case X86::PT2RPNTLVWZ1T1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
- break;
- case X86::PT2RPNTLVWZ0RS:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
- break;
- case X86::PT2RPNTLVWZ0RST1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
- break;
- case X86::PT2RPNTLVWZ1RS:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
- break;
- case X86::PT2RPNTLVWZ1RST1:
- Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
- break;
- }
-#undef GET_EGPR_IF_ENABLED
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
-
- MIB.add(MI.getOperand(1)); // base
- MIB.add(MI.getOperand(2)); // scale
- MIB.add(MI.getOperand(3)); // index
- MIB.add(MI.getOperand(4)); // displacement
- MIB.add(MI.getOperand(5)); // segment
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
- case X86::PTTRANSPOSED:
- case X86::PTCONJTFP16: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc = MI.getOpcode() == X86::PTTRANSPOSED ? X86::TTRANSPOSED
- : X86::TCONJTFP16;
-
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
case X86::PTCVTROWPS2BF16Hrri:
case X86::PTCVTROWPS2BF16Lrri:
case X86::PTCVTROWPS2PHHrri:
@@ -53502,7 +53345,8 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
+// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
+// i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -53528,14 +53372,20 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
// BTR: X & ~(1 << ShAmt)
// BTS: X | (1 << ShAmt)
// BTC: X ^ (1 << ShAmt)
- SDValue ShAmt;
+ //
+ // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
+ SDValue InsertBit, ShAmt;
if (!StoredVal.hasOneUse() ||
!(sd_match(StoredVal, m_And(m_Specific(LoadVal),
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
sd_match(StoredVal,
m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(StoredVal,
+ m_Or(m_And(m_Specific(LoadVal),
+ m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
+ m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
return SDValue();
// Ensure the shift amount is in bounds.
@@ -53543,6 +53393,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
return SDValue();
+ // If we're inserting a bit then it must be the LSB.
+ if (InsertBit) {
+ KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
+ if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
+ return SDValue();
+ }
+
// Split the shift into an alignment shift that moves the active i32 block to
// the bottom bits for truncation and a modulo shift that can act on the i32.
EVT AmtVT = ShAmt.getValueType();
@@ -53550,6 +53407,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
DAG.getSignedConstant(-32LL, DL, AmtVT));
SDValue ModuloAmt =
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+ ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
// Compute the byte offset for the i32 block that is changed by the RMW.
// combineTruncate will adjust the load for us in a similar way.
@@ -53564,13 +53422,23 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- SDValue Mask =
- DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
- DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
- if (StoredVal.getOpcode() == ISD::AND)
- Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
+
+ SDValue Res;
+ if (InsertBit) {
+ SDValue BitMask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
+ Res =
+ DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
+ } else {
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ }
- SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
Align(), St->getMemOperand()->getFlags());
}
@@ -54591,6 +54459,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
+ using namespace SDPatternMatch;
if (!VT.isVector() || !Subtarget.hasSSSE3())
return SDValue();
@@ -54600,42 +54469,19 @@ static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
return SDValue();
SDValue SSatVal = detectSSatPattern(In, VT);
- if (!SSatVal || SSatVal.getOpcode() != ISD::ADD)
+ if (!SSatVal)
return SDValue();
- // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
- // of multiplies from even/odd elements.
- SDValue N0 = SSatVal.getOperand(0);
- SDValue N1 = SSatVal.getOperand(1);
-
- if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL)
- return SDValue();
-
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
-
+ // See if this is a signed saturation of an ADD, adding pairs of multiplies
+ // from even/odd elements, from zero_extend/sign_extend operands.
+ //
// TODO: Handle constant vectors and use knownbits/computenumsignbits?
- // Canonicalize zero_extend to LHS.
- if (N01.getOpcode() == ISD::ZERO_EXTEND)
- std::swap(N00, N01);
- if (N11.getOpcode() == ISD::ZERO_EXTEND)
- std::swap(N10, N11);
-
- // Ensure we have a zero_extend and a sign_extend.
- if (N00.getOpcode() != ISD::ZERO_EXTEND ||
- N01.getOpcode() != ISD::SIGN_EXTEND ||
- N10.getOpcode() != ISD::ZERO_EXTEND ||
- N11.getOpcode() != ISD::SIGN_EXTEND)
+ SDValue N00, N01, N10, N11;
+ if (!sd_match(SSatVal,
+ m_Add(m_Mul(m_ZExt(m_Value(N00)), m_SExt(m_Value(N01))),
+ m_Mul(m_ZExt(m_Value(N10)), m_SExt(m_Value(N11))))))
return SDValue();
- // Peek through the extends.
- N00 = N00.getOperand(0);
- N01 = N01.getOperand(0);
- N10 = N10.getOperand(0);
- N11 = N11.getOperand(0);
-
// Ensure the extend is from vXi8.
if (N00.getValueType().getVectorElementType() != MVT::i8 ||
N01.getValueType().getVectorElementType() != MVT::i8 ||
@@ -54768,9 +54614,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
// Check the shift amount is byte aligned.
// Check the truncation doesn't use any shifted in (zero) top bits.
+ // Check the shift amount doesn't depend on the original load.
if (KnownAmt.countMinTrailingZeros() >= 3 &&
KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
- VT.getSizeInBits())) {
+ VT.getSizeInBits()) &&
+ !Ld->isPredecessorOf(ShAmt.getNode())) {
EVT PtrVT = Ld->getBasePtr().getValueType();
SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
SDValue PtrByteOfs =