diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 12 |
3 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b4b86a9..6bcb28a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9925,9 +9925,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; - return DAG.getSelect(DL, VT, VMask, - DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), - ZeroVector); + return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, @@ -22043,9 +22041,15 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - DataToCompress), - Mask, PassThru, Subtarget, DAG); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + // Avoid false dependency. + if (PassThru.isUndef()) + PassThru = DAG.getConstant(0, dl, VT); + + return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, + Mask); } case FIXUPIMMS: case FIXUPIMMS_MASKZ: diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index bc8475c..9509a04 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10546,7 +10546,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", - (_.VT (X86compress _.RC:$src1))>, AVX5128IBase, + (null_frag)>, AVX5128IBase, Sched<[sched]>; let mayStore = 1, hasSideEffects = 0 in @@ -10568,6 +10568,13 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), (!cast<Instruction>(Name#_.ZSuffix##mrk) addr:$dst, _.KRCWM:$mask, _.RC:$src)>; + + def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix##rrk) + _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; + def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix##rrkz) + _.KRCWM:$mask, _.RC:$src)>; } multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, @@ -10601,13 +10608,12 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", - (_.VT (X86expand _.RC:$src1))>, AVX5128IBase, + (null_frag)>, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", - (_.VT (X86expand (_.VT (bitconvert - (_.LdFrag addr:$src1)))))>, + (null_frag)>, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10626,6 +10632,13 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { (_.VT _.RC:$src0))), (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0, _.KRCWM:$mask, addr:$src)>; + + def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix##rrk) + _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; + def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix##rrkz) + _.KRCWM:$mask, _.RC:$src)>; } multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 03baf80..6b9b28b 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -524,10 +524,14 @@ def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>; def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>; def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>; -def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1, - [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; -def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, - [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; +def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>]>, []>; +def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, + SDTCisSameNumEltsAs<0, 3>]>, []>; // vpshufbitqmb def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB", |