aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td21
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td12
3 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b4b86a9..6bcb28a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9925,9 +9925,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
Subtarget, DAG, DL);
SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
- return DAG.getSelect(DL, VT, VMask,
- DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
- ZeroVector);
+ return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);
}
static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
@@ -22043,9 +22041,15 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (isAllOnesConstant(Mask)) // return data as is
return Op.getOperand(1);
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- DataToCompress),
- Mask, PassThru, Subtarget, DAG);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+ // Avoid false dependency.
+ if (PassThru.isUndef())
+ PassThru = DAG.getConstant(0, dl, VT);
+
+ return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru,
+ Mask);
}
case FIXUPIMMS:
case FIXUPIMMS_MASKZ:
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index bc8475c..9509a04 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10546,7 +10546,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
+ (null_frag)>, AVX5128IBase,
Sched<[sched]>;
let mayStore = 1, hasSideEffects = 0 in
@@ -10568,6 +10568,13 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
(!cast<Instruction>(Name#_.ZSuffix##mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
+
+ def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+ def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ _.KRCWM:$mask, _.RC:$src)>;
}
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -10601,13 +10608,12 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
+ (null_frag)>, AVX5128IBase,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86expand (_.VT (bitconvert
- (_.LdFrag addr:$src1)))))>,
+ (null_frag)>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -10626,6 +10632,13 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
(_.VT _.RC:$src0))),
(!cast<Instruction>(Name#_.ZSuffix##rmk)
_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
+
+ def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+ def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ _.KRCWM:$mask, _.RC:$src)>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 03baf80..6b9b28b 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -524,10 +524,14 @@ def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>;
def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>;
def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>;
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
- [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
-def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
- [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<0, 3>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<0, 3>]>, []>;
// vpshufbitqmb
def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",