diff options
Diffstat (limited to 'llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 356 |
1 files changed, 229 insertions, 127 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 904aabed..32ea219 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -15,6 +15,7 @@ #include "LoongArch.h" #include "LoongArchMachineFunctionInfo.h" #include "LoongArchRegisterInfo.h" +#include "LoongArchSelectionDAGInfo.h" #include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" @@ -76,7 +77,7 @@ static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI) - : TargetLowering(TM), Subtarget(STI) { + : TargetLowering(TM, STI), Subtarget(STI) { MVT GRLenVT = Subtarget.getGRLenVT(); @@ -351,6 +352,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -375,6 +378,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } setOperationAction(ISD::CTPOP, GRLenVT, Legal); setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -437,6 +442,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -461,6 +468,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } } @@ -596,6 +605,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBF16_TO_FP(Op, DAG); case ISD::VECREDUCE_ADD: return lowerVECREDUCE_ADD(Op, DAG); + case ISD::ROTL: + case ISD::ROTR: + return lowerRotate(Op, DAG); case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: @@ -610,6 +622,59 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return SDValue(); } +// Helper to attempt to return a cheaper, bit-inverted version of \p V. +static SDValue isNOT(SDValue V, SelectionDAG &DAG) { + // TODO: don't always ignore oneuse constraints. + V = peekThroughBitcasts(V); + EVT VT = V.getValueType(); + + // Match not(xor X, -1) -> X. + if (V.getOpcode() == ISD::XOR && + (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) || + isAllOnesConstant(V.getOperand(1)))) + return V.getOperand(0); + + // Match not(extract_subvector(not(X)) -> extract_subvector(X). + if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { + if (SDValue Not = isNOT(V.getOperand(0), DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not, + V.getOperand(1)); + } + } + + // Match not(SplatVector(not(X)) -> SplatVector(X). + if (V.getOpcode() == ISD::BUILD_VECTOR) { + if (SDValue SplatValue = + cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) { + if (!V->isOnlyUserOf(SplatValue.getNode())) + return SDValue(); + + if (SDValue Not = isNOT(SplatValue, DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getSplat(VT, SDLoc(Not), Not); + } + } + } + + // Match not(or(not(X),not(Y))) -> and(X, Y). + if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) { + // TODO: Handle cases with single NOT operand -> VANDN + if (SDValue Op1 = isNOT(V.getOperand(1), DAG)) + if (SDValue Op0 = isNOT(V.getOperand(0), DAG)) + return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0), + DAG.getBitcast(VT, Op1)); + } + + // TODO: Add more matching patterns. Such as, + // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y). + // not(slt(C, X)) -> slt(X - 1, C) + + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -769,6 +834,58 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op, return Op; } +SDValue LoongArchTargetLowering::lowerRotate(SDValue Op, + SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + unsigned Opcode = Op.getOpcode(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + if (SDValue SplatValue = + cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) { + if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) { + CstSplatValue = C->getAPIntValue(); + return true; + } + } + return false; + }; + + // Check for constant splat rotation amount. + APInt CstSplatValue; + bool IsCstSplat = checkCstSplat(Amt, CstSplatValue); + bool isROTL = Opcode == ISD::ROTL; + + // Check for splat rotate by zero. + if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) + return R; + + // LoongArch targets always prefer ISD::ROTR. + if (isROTL) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::ROTR, DL, VT, R, + DAG.getNode(ISD::SUB, DL, VT, Zero, Amt)); + } + + // Rotate by a immediate. + if (IsCstSplat) { + // ISD::ROTR: Attemp to rotate by a positive immediate. + SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT); + if (SDValue Urem = + DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits})) + return DAG.getNode(Opcode, DL, VT, R, Urem); + } + + return Op; +} + // Return true if Val is equal to (setcc LHS, RHS, CC). // Return false if Val is the inverse of (setcc LHS, RHS, CC). // Otherwise, return std::nullopt. @@ -1708,7 +1825,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, // Return vshuf4i.d if (VT == MVT::v2f64 || VT == MVT::v2i64) - return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2, + return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2, DAG.getConstant(Imm, DL, GRLenVT)); return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, @@ -2881,11 +2998,13 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) && VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) { - SDVTList Tys = - LN->isIndexed() - ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other) - : DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()}; + // Indexed loads and stores are not supported on LoongArch. + assert(LN->isUnindexed() && "Unexpected indexed load."); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + // The offset operand of unindexed load is always undefined, so there is + // no need to pass it to VLDREPL. + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); return BCast; @@ -4455,7 +4574,7 @@ SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. -static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { +static unsigned getLoongArchWOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); @@ -4491,7 +4610,7 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); - LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); + unsigned WOpcode = getLoongArchWOpcode(N->getOpcode()); SDValue NewOp0, NewRes; switch (NumOp) { @@ -5050,6 +5169,33 @@ void LoongArchTargetLowering::ReplaceNodeResults( } } +/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y). +static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN"); + + MVT VT = N->getSimpleValueType(0); + if (!VT.is128BitVector() && !VT.is256BitVector()) + return SDValue(); + + SDValue X, Y; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (SDValue Not = isNOT(N0, DAG)) { + X = Not; + Y = N1; + } else if (SDValue Not = isNOT(N1, DAG)) { + X = Not; + Y = N0; + } else + return SDValue(); + + X = DAG.getBitcast(VT, X); + Y = DAG.getBitcast(VT, Y); + return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y); +} + static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { @@ -5067,6 +5213,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, SDValue NewOperand; MVT GRLenVT = Subtarget.getGRLenVT(); + if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG)) + return R; + // BSTRPICK requires the 32S feature. if (!Subtarget.has32S()) return SDValue(); @@ -6626,6 +6775,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0), N->getOperand(1)); break; + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } @@ -6739,6 +6893,69 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Do target-specific dag combines on LoongArchISD::VANDN nodes. +static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + MVT VT = N->getSimpleValueType(0); + SDLoc DL(N); + + // VANDN(undef, x) -> 0 + // VANDN(x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // VANDN(0, x) -> x + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + + // VANDN(x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); + + // VANDN(x, -1) -> NOT(x) -> XOR(x, -1) + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return DAG.getNOT(DL, N0, VT); + + // Turn VANDN back to AND if input is inverted. + if (SDValue Not = isNOT(N0, DAG)) + return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1); + + // Folds for better commutativity: + if (N1->hasOneUse()) { + // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)). + if (SDValue Not = isNOT(N1, DAG)) + return DAG.getNOT( + DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT); + + // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm))) + // -> NOT(OR(x, SplatVector(-Imm)) + // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to + // gain benefits. + if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) && + N1.getOpcode() == ISD::BUILD_VECTOR) { + if (SDValue SplatValue = + cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) { + if (!N1->isOnlyUserOf(SplatValue.getNode())) + return SDValue(); + + if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) { + uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue())); + SDValue Not = + DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8)); + return DAG.getNOT( + DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), + VT); + } + } + } + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6774,6 +6991,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); case ISD::EXTRACT_VECTOR_ELT: return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::VANDN: + return performVANDNCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -7474,123 +7693,6 @@ bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( return true; } -const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((LoongArchISD::NodeType)Opcode) { - case LoongArchISD::FIRST_NUMBER: - break; - -#define NODE_NAME_CASE(node) \ - case LoongArchISD::node: \ - return "LoongArchISD::" #node; - - // TODO: Add more target-dependent nodes later. - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(CALL_MEDIUM) - NODE_NAME_CASE(CALL_LARGE) - NODE_NAME_CASE(RET) - NODE_NAME_CASE(TAIL) - NODE_NAME_CASE(TAIL_MEDIUM) - NODE_NAME_CASE(TAIL_LARGE) - NODE_NAME_CASE(SELECT_CC) - NODE_NAME_CASE(BR_CC) - NODE_NAME_CASE(BRCOND) - NODE_NAME_CASE(SLL_W) - NODE_NAME_CASE(SRA_W) - NODE_NAME_CASE(SRL_W) - NODE_NAME_CASE(BSTRINS) - NODE_NAME_CASE(BSTRPICK) - NODE_NAME_CASE(MOVGR2FR_W) - NODE_NAME_CASE(MOVGR2FR_W_LA64) - NODE_NAME_CASE(MOVGR2FR_D) - NODE_NAME_CASE(MOVGR2FR_D_LO_HI) - NODE_NAME_CASE(MOVFR2GR_S_LA64) - NODE_NAME_CASE(FTINT) - NODE_NAME_CASE(BUILD_PAIR_F64) - NODE_NAME_CASE(SPLIT_PAIR_F64) - NODE_NAME_CASE(REVB_2H) - NODE_NAME_CASE(REVB_2W) - NODE_NAME_CASE(BITREV_4B) - NODE_NAME_CASE(BITREV_8B) - NODE_NAME_CASE(BITREV_W) - NODE_NAME_CASE(ROTR_W) - NODE_NAME_CASE(ROTL_W) - NODE_NAME_CASE(DIV_W) - NODE_NAME_CASE(DIV_WU) - NODE_NAME_CASE(MOD_W) - NODE_NAME_CASE(MOD_WU) - NODE_NAME_CASE(CLZ_W) - NODE_NAME_CASE(CTZ_W) - NODE_NAME_CASE(DBAR) - NODE_NAME_CASE(IBAR) - NODE_NAME_CASE(BREAK) - NODE_NAME_CASE(SYSCALL) - NODE_NAME_CASE(CRC_W_B_W) - NODE_NAME_CASE(CRC_W_H_W) - NODE_NAME_CASE(CRC_W_W_W) - NODE_NAME_CASE(CRC_W_D_W) - NODE_NAME_CASE(CRCC_W_B_W) - NODE_NAME_CASE(CRCC_W_H_W) - NODE_NAME_CASE(CRCC_W_W_W) - NODE_NAME_CASE(CRCC_W_D_W) - NODE_NAME_CASE(CSRRD) - NODE_NAME_CASE(CSRWR) - NODE_NAME_CASE(CSRXCHG) - NODE_NAME_CASE(IOCSRRD_B) - NODE_NAME_CASE(IOCSRRD_H) - NODE_NAME_CASE(IOCSRRD_W) - NODE_NAME_CASE(IOCSRRD_D) - NODE_NAME_CASE(IOCSRWR_B) - NODE_NAME_CASE(IOCSRWR_H) - NODE_NAME_CASE(IOCSRWR_W) - NODE_NAME_CASE(IOCSRWR_D) - NODE_NAME_CASE(CPUCFG) - NODE_NAME_CASE(MOVGR2FCSR) - NODE_NAME_CASE(MOVFCSR2GR) - NODE_NAME_CASE(CACOP_D) - NODE_NAME_CASE(CACOP_W) - NODE_NAME_CASE(VSHUF) - NODE_NAME_CASE(VPICKEV) - NODE_NAME_CASE(VPICKOD) - NODE_NAME_CASE(VPACKEV) - NODE_NAME_CASE(VPACKOD) - NODE_NAME_CASE(VILVL) - NODE_NAME_CASE(VILVH) - NODE_NAME_CASE(VSHUF4I) - NODE_NAME_CASE(VREPLVEI) - NODE_NAME_CASE(VREPLGR2VR) - NODE_NAME_CASE(XVPERMI) - NODE_NAME_CASE(XVPERM) - NODE_NAME_CASE(XVREPLVE0) - NODE_NAME_CASE(XVREPLVE0Q) - NODE_NAME_CASE(XVINSVE0) - NODE_NAME_CASE(VPICK_SEXT_ELT) - NODE_NAME_CASE(VPICK_ZEXT_ELT) - NODE_NAME_CASE(VREPLVE) - NODE_NAME_CASE(VALL_ZERO) - NODE_NAME_CASE(VANY_ZERO) - NODE_NAME_CASE(VALL_NONZERO) - NODE_NAME_CASE(VANY_NONZERO) - NODE_NAME_CASE(FRECIPE) - NODE_NAME_CASE(FRSQRTE) - NODE_NAME_CASE(VSLLI) - NODE_NAME_CASE(VSRLI) - NODE_NAME_CASE(VBSLL) - NODE_NAME_CASE(VBSRL) - NODE_NAME_CASE(VLDREPL) - NODE_NAME_CASE(VMSKLTZ) - NODE_NAME_CASE(VMSKGEZ) - NODE_NAME_CASE(VMSKEQZ) - NODE_NAME_CASE(VMSKNEZ) - NODE_NAME_CASE(XVMSKLTZ) - NODE_NAME_CASE(XVMSKGEZ) - NODE_NAME_CASE(XVMSKEQZ) - NODE_NAME_CASE(XVMSKNEZ) - NODE_NAME_CASE(VHADDW) - } -#undef NODE_NAME_CASE - return nullptr; -} - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -8810,7 +8912,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { } bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { |
