aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td23
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td10
5 files changed, 43 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7eb67a..cd04ff5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
unsigned NumElts = VT.getVectorNumElements();
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
- if (IsFSHR)
- std::swap(Op0, Op1);
if (IsCstSplat) {
+ if (IsFSHR)
+ std::swap(Op0, Op1);
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
{Op0, Op1, Imm}, DAG, Subtarget);
}
- return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
@@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VALIGN)
NODE_NAME_CASE(VSHLD)
NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(VSHLDV)
- NODE_NAME_CASE(VSHRDV)
NODE_NAME_CASE(PSHUFD)
NODE_NAME_CASE(PSHUFHW)
NODE_NAME_CASE(PSHUFLW)
@@ -45171,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
return true;
+ case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -45241,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDV:
return false;
// SSE target shuffles.
+ case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::UNPCKL:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8ab8c66..b55556a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -471,8 +471,7 @@ namespace llvm {
// VBMI2 Concat & Shift.
VSHLD,
VSHRD,
- VSHLDV,
- VSHRDV,
+
// Shuffle Packed Values at 128-bit granularity.
SHUF128,
MOVDDUP,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2371ed4..564810c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
// VBMI2
//===----------------------------------------------------------------------===//
-multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in {
defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>,
T8, PD, EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.LdFrag addr:$src3))))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>,
T8, PD, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
-multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
- : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
+ : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
"${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
- (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
+ !if(SwapLR,
+ (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))),
+ (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>,
T8, PD, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
-multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
- SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
+ SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> {
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched,
avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched,
avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
}
@@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
}
// Concat & Shift
-defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
-defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
+defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>;
+defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>;
defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index b4768590..031fdc1 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -25,18 +25,12 @@ let SchedRW = [WriteLEA] in {
[(set GR32:$dst, lea32addr:$src)]>,
OpSize32, Requires<[Not64BitMode]>;
- let Predicates = [HasNDD], isCodeGenOnly = 1 in {
- def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR8:$dst), (ins lea64_8mem:$src),
- "lea{b}\t{$src|$dst}, {$dst|$src}",
- [(set GR8:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
-
- def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins lea64_16mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}",
- [(set GR16:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in {
+ def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_8mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
+
+ def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_16mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
}
def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src),
@@ -51,6 +45,11 @@ let SchedRW = [WriteLEA] in {
[(set GR64:$dst, lea64addr:$src)]>;
} // SchedRW
+let Predicates = [HasNDD] in {
+ def : Pat<(i8 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_8r lea64_8mem:$src), sub_8bit)>;
+ def : Pat<(i16 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_16r lea64_16mem:$src), sub_16bit)>;
+}
+
// Pseudo instruction for lea that prevent optimizer from eliminating
// the instruction.
let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0c20ffe..5321ecf 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
-def X86VShldv : SDNode<"X86ISD::VSHLDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
-def X86VShrdv : SDNode<"X86ISD::VSHRDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;