diff options
author | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:31:57 +0900 |
---|---|---|
committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:33:27 +0900 |
commit | df025ebf872052c0761d44a3ef9b65e9675af8a8 (patch) | |
tree | 9b4e94583e2536546d6606270bcdf846c95e1ba2 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | 4428c9d0b1344179f85a72e183a44796976521e3 (diff) | |
parent | bdcf47e4bcb92889665825654bb80a8bbe30379e (diff) | |
download | llvm-users/chapuni/cov/single/loop.zip llvm-users/chapuni/cov/single/loop.tar.gz llvm-users/chapuni/cov/single/loop.tar.bz2 |
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/loopusers/chapuni/cov/single/loop
Conflicts:
clang/lib/CodeGen/CoverageMappingGen.cpp
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 134 |
1 files changed, 122 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e7f6032e..6b0eb38 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ static cl::opt<int> BrMergingCcmpBias( static cl::opt<bool> WidenShift("x86-widen-shift", cl::init(true), - cl::desc("Replacte narrow shifts with wider shifts."), + cl::desc("Replace narrow shifts with wider shifts."), cl::Hidden); static cl::opt<int> BrMergingLikelyBias( @@ -341,8 +341,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } if (Subtarget.hasAVX10_2()) { - setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom); + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom); + for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, + MVT::v4i64}) { + setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal); + setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal); + } + if (Subtarget.hasAVX10_2_512()) { + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v8i64, Legal); + } if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal); @@ -623,6 +632,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXNUM, VT, Action); setOperationAction(ISD::FMINIMUM, VT, Action); setOperationAction(ISD::FMAXIMUM, VT, Action); + setOperationAction(ISD::FMINIMUMNUM, VT, Action); + setOperationAction(ISD::FMAXIMUMNUM, VT, Action); setOperationAction(ISD::FSIN, VT, Action); setOperationAction(ISD::FCOS, VT, Action); setOperationAction(ISD::FSINCOS, VT, Action); @@ -1066,6 +1077,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom); setOperationAction(ISD::FMINIMUM, MVT::f32, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::f32, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::f32, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); @@ -1108,6 +1121,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FMAXIMUMNUM, VT, Custom); + setOperationAction(ISD::FMINIMUMNUM, VT, Custom); } for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, @@ -1473,6 +1488,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FMAXIMUMNUM, VT, Custom); + setOperationAction(ISD::FMINIMUMNUM, VT, Custom); setOperationAction(ISD::FCANONICALIZE, VT, Custom); } @@ -1818,6 +1835,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FMAXIMUMNUM, VT, Custom); + setOperationAction(ISD::FMINIMUMNUM, VT, Custom); setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FABS, VT, Custom); setOperationAction(ISD::FMA, VT, Legal); @@ -2289,6 +2308,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom); setOperationAction(ISD::FMINIMUM, MVT::f16, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::f16, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); @@ -2336,6 +2357,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMINIMUM, MVT::v32f16, Custom); setOperationAction(ISD::FMAXIMUM, MVT::v32f16, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::v32f16, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::v32f16, Custom); } if (Subtarget.hasVLX()) { @@ -2383,9 +2406,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMINIMUM, MVT::v8f16, Custom); setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::v8f16, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::v8f16, Custom); setOperationAction(ISD::FMINIMUM, MVT::v16f16, Custom); setOperationAction(ISD::FMAXIMUM, MVT::v16f16, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::v16f16, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::v16f16, Custom); } } @@ -2442,6 +2469,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FMAXIMUM, VT, Custom); + setOperationAction(ISD::FMINIMUMNUM, VT, Custom); + setOperationAction(ISD::FMAXIMUMNUM, VT, Custom); } if (Subtarget.hasAVX10_2_512()) { setOperationAction(ISD::FADD, MVT::v32bf16, Legal); @@ -2451,6 +2482,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal); setOperationAction(ISD::FMA, MVT::v32bf16, Legal); setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); + setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom); + setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom); + setOperationAction(ISD::FMINIMUMNUM, MVT::v32bf16, Custom); + setOperationAction(ISD::FMAXIMUMNUM, MVT::v32bf16, Custom); } for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { setCondCodeAction(ISD::SETOEQ, VT, Custom); @@ -2652,6 +2687,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, + ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::SETCC, ISD::MUL, ISD::XOR, @@ -28835,19 +28872,35 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget, static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert((Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMINIMUM) && - "Expected FMAXIMUM or FMINIMUM opcode"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Op.getValueType(); SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); SDLoc DL(Op); + bool IsMaxOp = + Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMAXIMUMNUM; + bool IsNum = + Op.getOpcode() == ISD::FMINIMUMNUM || Op.getOpcode() == ISD::FMAXIMUMNUM; + if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) { + unsigned Opc = 0; + if (VT.isVector()) + Opc = X86ISD::VMINMAX; + else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64) + Opc = X86ISD::VMINMAXS; + + if (Opc) { + SDValue Imm = + DAG.getTargetConstant(IsMaxOp + (IsNum ? 16 : 0), DL, MVT::i32); + return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags()); + } + } + uint64_t SizeInBits = VT.getScalarSizeInBits(); APInt PreferredZero = APInt::getZero(SizeInBits); APInt OppositeZero = PreferredZero; EVT IVT = VT.changeTypeToInteger(); X86ISD::NodeType MinMaxOp; - if (Op.getOpcode() == ISD::FMAXIMUM) { + if (IsMaxOp) { MinMaxOp = X86ISD::FMAX; OppositeZero.setSignBit(); } else { @@ -28977,7 +29030,9 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, if (IgnoreNaN || DAG.isKnownNeverNaN(NewX)) return MinMax; - SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO); + SDValue IsNaN = + DAG.getSetCC(DL, SetCCType, NewX, NewX, IsNum ? ISD::SETO : ISD::SETUO); + return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax); } @@ -33235,6 +33290,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG); case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: return LowerFMINIMUM_FMAXIMUM(Op, Subtarget, DAG); case ISD::ABS: return LowerABS(Op, Subtarget, DAG); case ISD::ABDS: @@ -33647,6 +33704,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: { + if (!Subtarget.hasAVX10_2()) + return; + + bool IsSigned = Opc == ISD::FP_TO_SINT_SAT; + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDValue Res; + + if (VT == MVT::v2i32 && OpVT == MVT::v2f64) { + if (IsSigned) + Res = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, Op); + else + Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op); + Results.push_back(Res); + } + return; + } case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: @@ -34627,6 +34704,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VPERMV3) NODE_NAME_CASE(VPERMI) NODE_NAME_CASE(VPTERNLOG) + NODE_NAME_CASE(FP_TO_SINT_SAT) + NODE_NAME_CASE(FP_TO_UINT_SAT) NODE_NAME_CASE(VFIXUPIMM) NODE_NAME_CASE(VFIXUPIMM_SAE) NODE_NAME_CASE(VFIXUPIMMS) @@ -41615,6 +41694,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MVT VT = N.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> Mask; unsigned Opcode = N.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -41900,7 +41981,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, APInt Mask = APInt::getHighBitsSet(64, 32); if (DAG.MaskedValueIsZero(In, Mask)) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In); - MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); + MVT VecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc); SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec); return DAG.getBitcast(VT, Movl); @@ -41915,7 +41996,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, // Create a vector constant - scalar constant followed by zeros. EVT ScalarVT = N0.getOperand(0).getValueType(); Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext()); - unsigned NumElts = VT.getVectorNumElements(); Constant *Zero = ConstantInt::getNullValue(ScalarTy); SmallVector<Constant *, 32> ConstantVec(NumElts, Zero); ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue()); @@ -41966,9 +42046,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, MVT SrcVT = N0.getOperand(0).getSimpleValueType(); unsigned SrcBits = SrcVT.getScalarSizeInBits(); if ((EltBits % SrcBits) == 0 && SrcBits >= 32) { - unsigned Size = VT.getVectorNumElements(); unsigned NewSize = SrcVT.getVectorNumElements(); - APInt BlendMask = N.getConstantOperandAPInt(2).zextOrTrunc(Size); + APInt BlendMask = N.getConstantOperandAPInt(2).zextOrTrunc(NumElts); APInt NewBlendMask = APIntOps::ScaleBitMask(BlendMask, NewSize); return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), @@ -42381,7 +42460,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; - MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); + MVT DVT = MVT::getVectorVT(MVT::i32, NumElts / 2); V = DAG.getBitcast(DVT, V); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); @@ -45976,6 +46055,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, case ISD::FMAXNUM_IEEE: case ISD::FMAXIMUM: case ISD::FMINIMUM: + case ISD::FMAXIMUMNUM: + case ISD::FMINIMUMNUM: case X86ISD::FMAX: case X86ISD::FMIN: case ISD::FABS: // Begin 1 operand @@ -56184,6 +56265,33 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS +static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX10_2()) + return SDValue(); + + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DstVT = N->getValueType(0); + SDLoc dl(N); + + if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) { + SDValue V2F32Value = DAG.getUNDEF(SrcVT); + + // Concatenate the original v2f32 input and V2F32Value to create v4f32 + SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + N->getOperand(0), V2F32Value); + + // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node + if (IsSigned) + return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, NewSrc); + + return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, NewSrc); + } + return SDValue(); +} + static bool needCarryOrOverflowFlag(SDValue Flags) { assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); @@ -59297,6 +59405,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget); // clang-format on } |