diff options
Diffstat (limited to 'llvm/lib/Target/LoongArch')
23 files changed, 690 insertions, 379 deletions
diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 0f674b1..8689d09 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM LoongArchGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(LoongArchCommonTableGen) @@ -27,6 +28,7 @@ add_llvm_target(LoongArchCodeGen LoongArchMergeBaseOffset.cpp LoongArchOptWInstrs.cpp LoongArchRegisterInfo.cpp + LoongArchSelectionDAGInfo.cpp LoongArchSubtarget.cpp LoongArchTargetMachine.cpp LoongArchTargetTransformInfo.cpp diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 6497ff9..67f07f0 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -202,6 +202,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, // Define the LoongArch target. //===----------------------------------------------------------------------===// +defm : RemapAllTargetPseudoPointerOperands<GPR>; + def LoongArchInstrInfo : InstrInfo { let guessInstructionProperties = 0; } diff --git a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp index 0ccebeb3..6358e348 100644 --- a/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchDeadRegisterDefinitions.cpp @@ -60,7 +60,6 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( return false; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS(); LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n"); @@ -86,7 +85,7 @@ bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( continue; LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I); if (!(RC && RC->contains(LoongArch::R0))) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index e86b21c..32954b6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -30,13 +30,18 @@ def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point // comparisons to prevent recursive lowering. def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>; + +// FPR<->GPR transfer operations def loongarch_movgr2fr_w : SDNode<"LoongArchISD::MOVGR2FR_W", SDT_LoongArchMOVGR2FR_W>; def loongarch_movgr2fr_w_la64 : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; def loongarch_movfr2gr_s_la64 : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>; + def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; + +// Floating point approximate reciprocal operation def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>; def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 2e88254..e6cad1b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -20,6 +20,7 @@ def SDT_LoongArchMOVGR2FR_D_LO_HI : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +// FPR<->GPR transfer operations def loongarch_movgr2fr_d : SDNode<"LoongArchISD::MOVGR2FR_D", SDT_LoongArchMOVGR2FR_D>; def loongarch_movgr2fr_d_lo_hi diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 1493bf4..690b063 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -449,7 +449,7 @@ bool LoongArchFrameLowering::spillCalleeSavedRegisters( bool IsKill = !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken()); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, TRI, + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC, Register()); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 1eed877..4c8dcb8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H #include "LoongArch.h" +#include "LoongArchSelectionDAGInfo.h" #include "LoongArchTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index a6de839..32ea219 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -15,6 +15,7 @@ #include "LoongArch.h" #include "LoongArchMachineFunctionInfo.h" #include "LoongArchRegisterInfo.h" +#include "LoongArchSelectionDAGInfo.h" #include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" @@ -76,7 +77,7 @@ static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI) - : TargetLowering(TM), Subtarget(STI) { + : TargetLowering(TM, STI), Subtarget(STI) { MVT GRLenVT = Subtarget.getGRLenVT(); @@ -351,6 +352,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -371,6 +374,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } setOperationAction(ISD::CTPOP, GRLenVT, Legal); setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -433,6 +442,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -453,6 +464,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); } } @@ -588,6 +605,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBF16_TO_FP(Op, DAG); case ISD::VECREDUCE_ADD: return lowerVECREDUCE_ADD(Op, DAG); + case ISD::ROTL: + case ISD::ROTR: + return lowerRotate(Op, DAG); case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: @@ -602,6 +622,59 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return SDValue(); } +// Helper to attempt to return a cheaper, bit-inverted version of \p V. +static SDValue isNOT(SDValue V, SelectionDAG &DAG) { + // TODO: don't always ignore oneuse constraints. + V = peekThroughBitcasts(V); + EVT VT = V.getValueType(); + + // Match not(xor X, -1) -> X. + if (V.getOpcode() == ISD::XOR && + (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) || + isAllOnesConstant(V.getOperand(1)))) + return V.getOperand(0); + + // Match not(extract_subvector(not(X)) -> extract_subvector(X). + if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { + if (SDValue Not = isNOT(V.getOperand(0), DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not, + V.getOperand(1)); + } + } + + // Match not(SplatVector(not(X)) -> SplatVector(X). + if (V.getOpcode() == ISD::BUILD_VECTOR) { + if (SDValue SplatValue = + cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) { + if (!V->isOnlyUserOf(SplatValue.getNode())) + return SDValue(); + + if (SDValue Not = isNOT(SplatValue, DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getSplat(VT, SDLoc(Not), Not); + } + } + } + + // Match not(or(not(X),not(Y))) -> and(X, Y). + if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) { + // TODO: Handle cases with single NOT operand -> VANDN + if (SDValue Op1 = isNOT(V.getOperand(1), DAG)) + if (SDValue Op0 = isNOT(V.getOperand(0), DAG)) + return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0), + DAG.getBitcast(VT, Op1)); + } + + // TODO: Add more matching patterns. Such as, + // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y). + // not(slt(C, X)) -> slt(X - 1, C) + + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -761,6 +834,58 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op, return Op; } +SDValue LoongArchTargetLowering::lowerRotate(SDValue Op, + SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + unsigned Opcode = Op.getOpcode(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + if (SDValue SplatValue = + cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) { + if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) { + CstSplatValue = C->getAPIntValue(); + return true; + } + } + return false; + }; + + // Check for constant splat rotation amount. + APInt CstSplatValue; + bool IsCstSplat = checkCstSplat(Amt, CstSplatValue); + bool isROTL = Opcode == ISD::ROTL; + + // Check for splat rotate by zero. + if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) + return R; + + // LoongArch targets always prefer ISD::ROTR. + if (isROTL) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::ROTR, DL, VT, R, + DAG.getNode(ISD::SUB, DL, VT, Zero, Amt)); + } + + // Rotate by a immediate. + if (IsCstSplat) { + // ISD::ROTR: Attemp to rotate by a positive immediate. + SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT); + if (SDValue Urem = + DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits})) + return DAG.getNode(Opcode, DL, VT, R, Urem); + } + + return Op; +} + // Return true if Val is equal to (setcc LHS, RHS, CC). // Return false if Val is the inverse of (setcc LHS, RHS, CC). // Otherwise, return std::nullopt. @@ -1700,7 +1825,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, // Return vshuf4i.d if (VT == MVT::v2f64 || VT == MVT::v2i64) - return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2, + return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2, DAG.getConstant(Imm, DL, GRLenVT)); return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, @@ -2873,11 +2998,13 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) && VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) { - SDVTList Tys = - LN->isIndexed() - ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other) - : DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()}; + // Indexed loads and stores are not supported on LoongArch. + assert(LN->isUnindexed() && "Unexpected indexed load."); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + // The offset operand of unindexed load is always undefined, so there is + // no need to pass it to VLDREPL. + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); return BCast; @@ -4447,7 +4574,7 @@ SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. -static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { +static unsigned getLoongArchWOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); @@ -4483,7 +4610,7 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); - LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); + unsigned WOpcode = getLoongArchWOpcode(N->getOpcode()); SDValue NewOp0, NewRes; switch (NumOp) { @@ -5042,6 +5169,33 @@ void LoongArchTargetLowering::ReplaceNodeResults( } } +/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y). +static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN"); + + MVT VT = N->getSimpleValueType(0); + if (!VT.is128BitVector() && !VT.is256BitVector()) + return SDValue(); + + SDValue X, Y; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (SDValue Not = isNOT(N0, DAG)) { + X = Not; + Y = N1; + } else if (SDValue Not = isNOT(N1, DAG)) { + X = Not; + Y = N0; + } else + return SDValue(); + + X = DAG.getBitcast(VT, X); + Y = DAG.getBitcast(VT, Y); + return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y); +} + static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { @@ -5059,6 +5213,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, SDValue NewOperand; MVT GRLenVT = Subtarget.getGRLenVT(); + if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG)) + return R; + // BSTRPICK requires the 32S feature. if (!Subtarget.has32S()) return SDValue(); @@ -6618,6 +6775,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0), N->getOperand(1)); break; + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } @@ -6731,6 +6893,69 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Do target-specific dag combines on LoongArchISD::VANDN nodes. +static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + MVT VT = N->getSimpleValueType(0); + SDLoc DL(N); + + // VANDN(undef, x) -> 0 + // VANDN(x, undef) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, DL, VT); + + // VANDN(0, x) -> x + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + + // VANDN(x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); + + // VANDN(x, -1) -> NOT(x) -> XOR(x, -1) + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return DAG.getNOT(DL, N0, VT); + + // Turn VANDN back to AND if input is inverted. + if (SDValue Not = isNOT(N0, DAG)) + return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1); + + // Folds for better commutativity: + if (N1->hasOneUse()) { + // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)). + if (SDValue Not = isNOT(N1, DAG)) + return DAG.getNOT( + DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT); + + // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm))) + // -> NOT(OR(x, SplatVector(-Imm)) + // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to + // gain benefits. + if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) && + N1.getOpcode() == ISD::BUILD_VECTOR) { + if (SDValue SplatValue = + cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) { + if (!N1->isOnlyUserOf(SplatValue.getNode())) + return SDValue(); + + if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) { + uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue())); + SDValue Not = + DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8)); + return DAG.getNOT( + DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), + VT); + } + } + } + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6766,6 +6991,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); case ISD::EXTRACT_VECTOR_ELT: return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::VANDN: + return performVANDNCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -7466,123 +7693,6 @@ bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( return true; } -const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((LoongArchISD::NodeType)Opcode) { - case LoongArchISD::FIRST_NUMBER: - break; - -#define NODE_NAME_CASE(node) \ - case LoongArchISD::node: \ - return "LoongArchISD::" #node; - - // TODO: Add more target-dependent nodes later. - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(CALL_MEDIUM) - NODE_NAME_CASE(CALL_LARGE) - NODE_NAME_CASE(RET) - NODE_NAME_CASE(TAIL) - NODE_NAME_CASE(TAIL_MEDIUM) - NODE_NAME_CASE(TAIL_LARGE) - NODE_NAME_CASE(SELECT_CC) - NODE_NAME_CASE(BR_CC) - NODE_NAME_CASE(BRCOND) - NODE_NAME_CASE(SLL_W) - NODE_NAME_CASE(SRA_W) - NODE_NAME_CASE(SRL_W) - NODE_NAME_CASE(BSTRINS) - NODE_NAME_CASE(BSTRPICK) - NODE_NAME_CASE(MOVGR2FR_W) - NODE_NAME_CASE(MOVGR2FR_W_LA64) - NODE_NAME_CASE(MOVGR2FR_D) - NODE_NAME_CASE(MOVGR2FR_D_LO_HI) - NODE_NAME_CASE(MOVFR2GR_S_LA64) - NODE_NAME_CASE(FTINT) - NODE_NAME_CASE(BUILD_PAIR_F64) - NODE_NAME_CASE(SPLIT_PAIR_F64) - NODE_NAME_CASE(REVB_2H) - NODE_NAME_CASE(REVB_2W) - NODE_NAME_CASE(BITREV_4B) - NODE_NAME_CASE(BITREV_8B) - NODE_NAME_CASE(BITREV_W) - NODE_NAME_CASE(ROTR_W) - NODE_NAME_CASE(ROTL_W) - NODE_NAME_CASE(DIV_W) - NODE_NAME_CASE(DIV_WU) - NODE_NAME_CASE(MOD_W) - NODE_NAME_CASE(MOD_WU) - NODE_NAME_CASE(CLZ_W) - NODE_NAME_CASE(CTZ_W) - NODE_NAME_CASE(DBAR) - NODE_NAME_CASE(IBAR) - NODE_NAME_CASE(BREAK) - NODE_NAME_CASE(SYSCALL) - NODE_NAME_CASE(CRC_W_B_W) - NODE_NAME_CASE(CRC_W_H_W) - NODE_NAME_CASE(CRC_W_W_W) - NODE_NAME_CASE(CRC_W_D_W) - NODE_NAME_CASE(CRCC_W_B_W) - NODE_NAME_CASE(CRCC_W_H_W) - NODE_NAME_CASE(CRCC_W_W_W) - NODE_NAME_CASE(CRCC_W_D_W) - NODE_NAME_CASE(CSRRD) - NODE_NAME_CASE(CSRWR) - NODE_NAME_CASE(CSRXCHG) - NODE_NAME_CASE(IOCSRRD_B) - NODE_NAME_CASE(IOCSRRD_H) - NODE_NAME_CASE(IOCSRRD_W) - NODE_NAME_CASE(IOCSRRD_D) - NODE_NAME_CASE(IOCSRWR_B) - NODE_NAME_CASE(IOCSRWR_H) - NODE_NAME_CASE(IOCSRWR_W) - NODE_NAME_CASE(IOCSRWR_D) - NODE_NAME_CASE(CPUCFG) - NODE_NAME_CASE(MOVGR2FCSR) - NODE_NAME_CASE(MOVFCSR2GR) - NODE_NAME_CASE(CACOP_D) - NODE_NAME_CASE(CACOP_W) - NODE_NAME_CASE(VSHUF) - NODE_NAME_CASE(VPICKEV) - NODE_NAME_CASE(VPICKOD) - NODE_NAME_CASE(VPACKEV) - NODE_NAME_CASE(VPACKOD) - NODE_NAME_CASE(VILVL) - NODE_NAME_CASE(VILVH) - NODE_NAME_CASE(VSHUF4I) - NODE_NAME_CASE(VREPLVEI) - NODE_NAME_CASE(VREPLGR2VR) - NODE_NAME_CASE(XVPERMI) - NODE_NAME_CASE(XVPERM) - NODE_NAME_CASE(XVREPLVE0) - NODE_NAME_CASE(XVREPLVE0Q) - NODE_NAME_CASE(XVINSVE0) - NODE_NAME_CASE(VPICK_SEXT_ELT) - NODE_NAME_CASE(VPICK_ZEXT_ELT) - NODE_NAME_CASE(VREPLVE) - NODE_NAME_CASE(VALL_ZERO) - NODE_NAME_CASE(VANY_ZERO) - NODE_NAME_CASE(VALL_NONZERO) - NODE_NAME_CASE(VANY_NONZERO) - NODE_NAME_CASE(FRECIPE) - NODE_NAME_CASE(FRSQRTE) - NODE_NAME_CASE(VSLLI) - NODE_NAME_CASE(VSRLI) - NODE_NAME_CASE(VBSLL) - NODE_NAME_CASE(VBSRL) - NODE_NAME_CASE(VLDREPL) - NODE_NAME_CASE(VMSKLTZ) - NODE_NAME_CASE(VMSKGEZ) - NODE_NAME_CASE(VMSKEQZ) - NODE_NAME_CASE(VMSKNEZ) - NODE_NAME_CASE(XVMSKLTZ) - NODE_NAME_CASE(XVMSKGEZ) - NODE_NAME_CASE(XVMSKEQZ) - NODE_NAME_CASE(XVMSKNEZ) - NODE_NAME_CASE(VHADDW) - } -#undef NODE_NAME_CASE - return nullptr; -} - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -8802,7 +8912,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { } bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8a4d774..5277e7e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -21,179 +21,6 @@ namespace llvm { class LoongArchSubtarget; -namespace LoongArchISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // TODO: add more LoongArchISDs - CALL, - CALL_MEDIUM, - CALL_LARGE, - RET, - TAIL, - TAIL_MEDIUM, - TAIL_LARGE, - - // Select - SELECT_CC, - - // Branch - BR_CC, - BRCOND, - - // 32-bit shifts, directly matching the semantics of the named LoongArch - // instructions. - SLL_W, - SRA_W, - SRL_W, - - ROTL_W, - ROTR_W, - - // unsigned 32-bit integer division - DIV_W, - MOD_W, - DIV_WU, - MOD_WU, - - // FPR<->GPR transfer operations - MOVGR2FR_W, - MOVGR2FR_W_LA64, - MOVGR2FR_D, - MOVGR2FR_D_LO_HI, - MOVFR2GR_S_LA64, - MOVFCSR2GR, - MOVGR2FCSR, - - FTINT, - - // Build and split F64 pair - BUILD_PAIR_F64, - SPLIT_PAIR_F64, - - // Bit counting operations - CLZ_W, - CTZ_W, - - BSTRINS, - BSTRPICK, - - // Byte-swapping and bit-reversal - REVB_2H, - REVB_2W, - BITREV_4B, - BITREV_8B, - BITREV_W, - - // Intrinsic operations start ============================================ - BREAK, - CACOP_D, - CACOP_W, - DBAR, - IBAR, - SYSCALL, - - // CRC check operations - CRC_W_B_W, - CRC_W_H_W, - CRC_W_W_W, - CRC_W_D_W, - CRCC_W_B_W, - CRCC_W_H_W, - CRCC_W_W_W, - CRCC_W_D_W, - - CSRRD, - - // Write new value to CSR and return old value. - // Operand 0: A chain pointer. - // Operand 1: The new value to write. - // Operand 2: The address of the required CSR. - // Result 0: The old value of the CSR. - // Result 1: The new chain pointer. - CSRWR, - - // Similar to CSRWR but with a write mask. - // Operand 0: A chain pointer. - // Operand 1: The new value to write. - // Operand 2: The write mask. - // Operand 3: The address of the required CSR. - // Result 0: The old value of the CSR. - // Result 1: The new chain pointer. - CSRXCHG, - - // IOCSR access operations - IOCSRRD_B, - IOCSRRD_W, - IOCSRRD_H, - IOCSRRD_D, - IOCSRWR_B, - IOCSRWR_H, - IOCSRWR_W, - IOCSRWR_D, - - // Read CPU configuration information operation - CPUCFG, - - // Vector Shuffle - VREPLVE, - VSHUF, - VPICKEV, - VPICKOD, - VPACKEV, - VPACKOD, - VILVL, - VILVH, - VSHUF4I, - VREPLVEI, - VREPLGR2VR, - XVPERMI, - XVPERM, - XVREPLVE0, - XVREPLVE0Q, - XVINSVE0, - - // Extended vector element extraction - VPICK_SEXT_ELT, - VPICK_ZEXT_ELT, - - // Vector comparisons - VALL_ZERO, - VANY_ZERO, - VALL_NONZERO, - VANY_NONZERO, - - // Floating point approximate reciprocal operation - FRECIPE, - FRSQRTE, - - // Vector logicial left / right shift by immediate - VSLLI, - VSRLI, - - // Vector byte logicial left / right shift - VBSLL, - VBSRL, - - // Scalar load broadcast to vector - VLDREPL, - - // Vector mask set by condition - VMSKLTZ, - VMSKGEZ, - VMSKEQZ, - VMSKNEZ, - XVMSKLTZ, - XVMSKGEZ, - XVMSKEQZ, - XVMSKNEZ, - - // Vector Horizontal Addition with Widening‌ - VHADDW - - // Intrinsic operations end ============================================= -}; -} // end namespace LoongArchISD class LoongArchTargetLowering : public TargetLowering { const LoongArchSubtarget &Subtarget; @@ -213,9 +40,6 @@ public: SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - // Lower incoming arguments, copy physregs into vregs. SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -254,7 +78,7 @@ public: Value *NewVal, Value *Mask, AtomicOrdering Ord) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; @@ -415,6 +239,7 @@ private: SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index c89212d..9fc862a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -26,9 +26,9 @@ using namespace llvm; #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) - : LoongArchGenInstrInfo(STI, LoongArch::ADJCALLSTACKDOWN, + : LoongArchGenInstrInfo(STI, RegInfo, LoongArch::ADJCALLSTACKDOWN, LoongArch::ADJCALLSTACKUP), - STI(STI) {} + RegInfo(STI.getHwMode()), STI(STI) {} MCInst LoongArchInstrInfo::getNop() const { return MCInstBuilder(LoongArch::ANDI) @@ -113,14 +113,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void LoongArchInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + Opcode = TRI.getRegSizeInBits(LoongArch::GPRRegClass) == 32 ? LoongArch::ST_W : LoongArch::ST_D; else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) @@ -149,8 +149,8 @@ void LoongArchInstrInfo::storeRegToStackSlot( void LoongArchInstrInfo::loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { + int FI, const TargetRegisterClass *RC, Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction *MF = MBB.getParent(); MachineFrameInfo &MFI = MF->getFrameInfo(); DebugLoc DL; @@ -159,7 +159,7 @@ void LoongArchInstrInfo::loadRegFromStackSlot( unsigned Opcode; if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + Opcode = RegInfo.getRegSizeInBits(LoongArch::GPRRegClass) == 32 ? LoongArch::LD_W : LoongArch::LD_D; else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) @@ -378,12 +378,9 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } } -bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) - return true; - +bool LoongArchInstrInfo::isSafeToMove(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { auto MII = MI.getIterator(); auto MIE = MBB->end(); @@ -429,25 +426,25 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO2 = Lu32I->getOperand(2).getTargetFlags(); if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO && MO2 == LoongArchII::MO_PCREL64_LO) - return true; + return false; if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI || MO0 == LoongArchII::MO_GD_PC_HI) && MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO) - return true; + return false; if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO && MO2 == LoongArchII::MO_IE_PC64_LO) - return true; + return false; if (MO0 == LoongArchII::MO_DESC_PC_HI && MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC64_PC_LO) - return true; + return false; break; } case LoongArch::LU52I_D: { auto MO = MI.getOperand(2).getTargetFlags(); if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI || MO == LoongArchII::MO_IE_PC64_HI || MO == LoongArchII::MO_DESC64_PC_HI) - return true; + return false; break; } default: @@ -487,7 +484,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2)); auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2)); if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD) - return true; + return false; break; } if (SecondOp == MIE || @@ -496,34 +493,34 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2)); if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_PCREL_LO) - return true; + return false; if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp && MO1 == LoongArchII::MO_GOT_PC_LO) - return true; + return false; if ((MO0 == LoongArchII::MO_LD_PC_HI || MO0 == LoongArchII::MO_GD_PC_HI) && SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::ADDI_W: case LoongArch::ADDI_D: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::LD_W: case LoongArch::LD_D: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_GOT_PC_LO) - return true; + return false; break; } case LoongArch::PseudoDESC_CALL: { auto MO = LoongArchII::getDirectFlags(MI.getOperand(2)); if (MO == LoongArchII::MO_DESC_CALL) - return true; + return false; break; } default: @@ -531,6 +528,18 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, } } + return true; +} + +bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) + return true; + + if (!isSafeToMove(MI, MBB, MF)) + return true; + return false; } @@ -656,13 +665,13 @@ void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, if (FrameIndex == -1) report_fatal_error("The function size is incorrectly estimated."); storeRegToStackSlot(MBB, PCALAU12I, Scav, /*IsKill=*/true, FrameIndex, - &LoongArch::GPRRegClass, TRI, Register()); + &LoongArch::GPRRegClass, Register()); TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); PCALAU12I.getOperand(1).setMBB(&RestoreBB); ADDI.getOperand(2).setMBB(&RestoreBB); loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, - &LoongArch::GPRRegClass, TRI, Register()); + &LoongArch::GPRRegClass, Register()); TRI->eliminateFrameIndex(RestoreBB.back(), /*SpAdj=*/0, /*FIOperandNum=*/1); } @@ -756,6 +765,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return ArrayRef(TargetFlags); } +bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, + Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + enum MemIOffsetType { + Imm14Shift2, + Imm12, + Imm11Shift1, + Imm10Shift2, + Imm9Shift3, + Imm8, + Imm8Shift1, + Imm8Shift2, + Imm8Shift3 + }; + + MemIOffsetType OT; + switch (MemI.getOpcode()) { + default: + return false; + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_D: + OT = Imm14Shift2; + break; + case LoongArch::LD_B: + case LoongArch::LD_H: + case LoongArch::LD_W: + case LoongArch::LD_D: + case LoongArch::LD_BU: + case LoongArch::LD_HU: + case LoongArch::LD_WU: + case LoongArch::ST_B: + case LoongArch::ST_H: + case LoongArch::ST_W: + case LoongArch::ST_D: + case LoongArch::FLD_S: + case LoongArch::FLD_D: + case LoongArch::FST_S: + case LoongArch::FST_D: + case LoongArch::VLD: + case LoongArch::VST: + case LoongArch::XVLD: + case LoongArch::XVST: + case LoongArch::VLDREPL_B: + case LoongArch::XVLDREPL_B: + OT = Imm12; + break; + case LoongArch::VLDREPL_H: + case LoongArch::XVLDREPL_H: + OT = Imm11Shift1; + break; + case LoongArch::VLDREPL_W: + case LoongArch::XVLDREPL_W: + OT = Imm10Shift2; + break; + case LoongArch::VLDREPL_D: + case LoongArch::XVLDREPL_D: + OT = Imm9Shift3; + break; + case LoongArch::VSTELM_B: + case LoongArch::XVSTELM_B: + OT = Imm8; + break; + case LoongArch::VSTELM_H: + case LoongArch::XVSTELM_H: + OT = Imm8Shift1; + break; + case LoongArch::VSTELM_W: + case LoongArch::XVSTELM_W: + OT = Imm8Shift2; + break; + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_D: + OT = Imm8Shift3; + break; + } + + if (MemI.getOperand(0).getReg() == Reg) + return false; + + if ((AddrI.getOpcode() != LoongArch::ADDI_W && + AddrI.getOpcode() != LoongArch::ADDI_D) || + !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm()) + return false; + + int64_t OldOffset = MemI.getOperand(2).getImm(); + int64_t Disp = AddrI.getOperand(2).getImm(); + int64_t NewOffset = OldOffset + Disp; + if (!STI.is64Bit()) + NewOffset = SignExtend64<32>(NewOffset); + + if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) && + !(OT == Imm12 && isInt<12>(NewOffset)) && + !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) && + !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) && + !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) && + !(OT == Imm8 && isInt<8>(NewOffset)) && + !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) && + !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) && + !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset))) + return false; + + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = 0; + AM.Scale = 0; + AM.Displacement = NewOffset; + AM.Form = ExtAddrMode::Formula::Basic; + return true; +} + +MachineInstr * +LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + const DebugLoc &DL = MemI.getDebugLoc(); + MachineBasicBlock &MBB = *MemI.getParent(); + + assert(AM.ScaledReg == 0 && AM.Scale == 0 && + "Addressing mode not supported for folding"); + + unsigned MemIOp = MemI.getOpcode(); + switch (MemIOp) { + default: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + case LoongArch::VSTELM_B: + case LoongArch::VSTELM_H: + case LoongArch::VSTELM_W: + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_B: + case LoongArch::XVSTELM_H: + case LoongArch::XVSTELM_W: + case LoongArch::XVSTELM_D: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .addImm(MemI.getOperand(3).getImm()) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + } +} + // Returns true if this is the sext.w pattern, addi.w rd, rs, 0. bool LoongArch::isSEXT_W(const MachineInstr &MI) { return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() && diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index f25958a..9f7a0a2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -24,9 +24,13 @@ namespace llvm { class LoongArchSubtarget; class LoongArchInstrInfo : public LoongArchGenInstrInfo { + const LoongArchRegisterInfo RegInfo; + public: explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + const LoongArchRegisterInfo &getRegisterInfo() const { return RegInfo; } + MCInst getNop() const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -36,13 +40,11 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool IsKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool IsKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DstReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // Materializes the given integer Val into DstReg. @@ -64,6 +66,9 @@ public: bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; + bool isSafeToMove(const MachineInstr &MI, const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; @@ -93,6 +98,12 @@ public: ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const override; + MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const override; + protected: const LoongArchSubtarget &STI; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 9565a55..2e6653e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -100,14 +100,22 @@ def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, def loongarch_selectcc : SDNode<"LoongArchISD::SELECT_CC", SDT_LoongArchSelectCC>; def loongarch_brcc : SDNode<"LoongArchISD::BR_CC", SDT_LoongArchBrCC, [SDNPHasChain]>; + +// 32-bit shifts, directly matching the semantics of the named LoongArch +// instructions. def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; + def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; + +// unsigned 32-bit integer division def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>; def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>; def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>; def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>; + +// CRC check operations def loongarch_crc_w_b_w : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; def loongarch_crc_w_h_w @@ -124,37 +132,63 @@ def loongarch_crcc_w_w_w : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; def loongarch_crcc_w_d_w : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_bstrins : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; + +// Byte-swapping and bit-reversal def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>; def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>; def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>; def loongarch_bitrev_8b : SDNode<"LoongArchISD::BITREV_8B", SDTUnaryOp>; def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>; + +// Bit counting operations def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>; def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>; + def loongarch_dbar : SDNode<"LoongArchISD::DBAR", SDT_LoongArchVI, [SDNPHasChain, SDNPSideEffect]>; def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI, [SDNPHasChain, SDNPSideEffect]>; def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI, [SDNPHasChain, SDNPSideEffect]>; + +// FPR<->GPR transfer operations def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr, [SDNPHasChain]>; def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr, [SDNPHasChain, SDNPSideEffect]>; + def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI, [SDNPHasChain, SDNPSideEffect]>; def loongarch_csrrd : SDNode<"LoongArchISD::CSRRD", SDT_LoongArchCsrrd, [SDNPHasChain, SDNPSideEffect]>; + +// Write new value to CSR and return old value. +// Operand 0: A chain pointer. +// Operand 1: The new value to write. +// Operand 2: The address of the required CSR. +// Result 0: The old value of the CSR. +// Result 1: The new chain pointer. def loongarch_csrwr : SDNode<"LoongArchISD::CSRWR", SDT_LoongArchCsrwr, [SDNPHasChain, SDNPSideEffect]>; + +// Similar to CSRWR but with a write mask. +// Operand 0: A chain pointer. +// Operand 1: The new value to write. +// Operand 2: The write mask. +// Operand 3: The address of the required CSR. +// Result 0: The old value of the CSR. +// Result 1: The new chain pointer. def loongarch_csrxchg : SDNode<"LoongArchISD::CSRXCHG", SDT_LoongArchCsrxchg, [SDNPHasChain, SDNPSideEffect]>; + +// IOCSR access operations def loongarch_iocsrrd_b : SDNode<"LoongArchISD::IOCSRRD_B", SDTUnaryOp, [SDNPHasChain, SDNPSideEffect]>; def loongarch_iocsrrd_h : SDNode<"LoongArchISD::IOCSRRD_H", SDTUnaryOp, @@ -175,9 +209,12 @@ def loongarch_iocsrwr_w : SDNode<"LoongArchISD::IOCSRWR_W", def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D", SDT_LoongArchIocsrwr, [SDNPHasChain, SDNPSideEffect]>; + +// Read CPU configuration information operation def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp, [SDNPHasChain]>; +// Build and split F64 pair def loongarch_build_pair_f64 : SDNode<"LoongArchISD::BUILD_PAIR_F64", SDT_LoongArchBuildPairF64>; def loongarch_split_pair_f64 : SDNode<"LoongArchISD::SPLIT_PAIR_F64", diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ca4ee5f..d6af093 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -16,11 +16,15 @@ def SDT_LoongArchXVREPLVE0 : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; // Target nodes. + +// Vector Shuffle def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>; def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>; def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>; def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>; def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>; + +// Vector mask set by condition def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>; @@ -1396,7 +1400,7 @@ def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), (XVNOR_V LASX256:$xj, LASX256:$xk)>; // XVANDN_V foreach vt = [v32i8, v16i16, v8i32, v4i64] in -def : Pat<(and (vt (vnot LASX256:$xj)), (vt LASX256:$xk)), +def : Pat<(loongarch_vandn (vt LASX256:$xj), (vt LASX256:$xk)), (XVANDN_V LASX256:$xj, LASX256:$xk)>; // XVORN_V foreach vt = [v32i8, v16i16, v8i32, v4i64] in @@ -1443,6 +1447,11 @@ defm : PatXrXr<sra, "XVSRA">; defm : PatShiftXrXr<sra, "XVSRA">; defm : PatShiftXrSplatUimm<sra, "XVSRAI">; +// XVROTR[I]_{B/H/W/D} +defm : PatXrXr<rotr, "XVROTR">; +defm : PatShiftXrXr<rotr, "XVROTR">; +defm : PatShiftXrSplatUimm<rotr, "XVROTRI">; + // XVCLZ_{B/H/W/D} defm : PatXr<ctlz, "XVCLZ">; @@ -1450,25 +1459,25 @@ defm : PatXr<ctlz, "XVCLZ">; defm : PatXr<ctpop, "XVPCNT">; // XVBITCLR_{B/H/W/D} -def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), +def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1, v32i8:$xk)), v32i8:$xj), (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), +def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1, v16i16:$xk)), v16i16:$xj), (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), +def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1, v8i32:$xk)), v8i32:$xj), (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), +def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1, v4i64:$xk)), v4i64:$xj), (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; -def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, - (vsplati8imm7 v32i8:$xk)))), +def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1, + (vsplati8imm7 v32i8:$xk))), v32i8:$xj), (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; -def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, - (vsplati16imm15 v16i16:$xk)))), +def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1, + (vsplati16imm15 v16i16:$xk))), v16i16:$xj), (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; -def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, - (vsplati32imm31 v8i32:$xk)))), +def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1, + (vsplati32imm31 v8i32:$xk))), v8i32:$xj), (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; -def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, - (vsplati64imm63 v4i64:$xk)))), +def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1, + (vsplati64imm63 v4i64:$xk))), v4i64:$xj), (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; // XVBITCLRI_{B/H/W/D} @@ -1558,6 +1567,10 @@ defm : PatXrXrF<fmul, "XVFMUL">; // XVFDIV_{S/D} defm : PatXrXrF<fdiv, "XVFDIV">; +// XVFMAX_{S/D}, XVFMIN_{S/D} +defm : PatXrXrF<fmaxnum, "XVFMAX">; +defm : PatXrXrF<fminnum, "XVFMIN">; + // XVFMADD_{S/D} def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; @@ -2109,6 +2122,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>; defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>; defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>; +// LASX and LSX conversion +def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; } // Predicates = [HasExtLASX] /// Intrinsic pattern @@ -2424,6 +2468,12 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; +// Vector floating-point conversion +defm : PatXrF<fceil, "XVFRINTRP">; +defm : PatXrF<ffloor, "XVFRINTRM">; +defm : PatXrF<ftrunc, "XVFRINTRZ">; +defm : PatXrF<froundeven, "XVFRINTRNE">; + // load def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 92402ba..43ad381 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -34,7 +34,11 @@ def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>; def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; // Target nodes. + +// Vector Shuffle def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; + +// Vector comparisons def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>; def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", @@ -44,11 +48,13 @@ def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>; +// Extended vector element extraction def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; +// Vector Shuffle def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>; def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>; def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>; @@ -56,27 +62,33 @@ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>; def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>; def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>; def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>; +def loongarch_vandn: SDNode<"LoongArchISD::VANDN", SDT_LoongArchV2R>; def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>; -def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV2RUimm>; +def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I_D", SDT_LoongArchV2RUimm>; def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>; def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>; def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; +// Vector logicial left / right shift by immediate def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>; def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>; +// Vector byte logicial left / right shift def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>; def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>; +// Vector Horizontal Addition with Widening def loongarch_vhaddw : SDNode<"LoongArchISD::VHADDW", SDT_LoongArchV2R>; +// Scalar load broadcast to vector def loongarch_vldrepl : SDNode<"LoongArchISD::VLDREPL", SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +// Vector mask set by condition def loongarch_vmskltz: SDNode<"LoongArchISD::VMSKLTZ", SDT_LoongArchVMSKCOND>; def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>; @@ -1598,7 +1610,7 @@ def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), (VNOR_V LSX128:$vj, LSX128:$vk)>; // VANDN_V foreach vt = [v16i8, v8i16, v4i32, v2i64] in -def : Pat<(and (vt (vnot LSX128:$vj)), (vt LSX128:$vk)), +def : Pat<(loongarch_vandn (vt LSX128:$vj), (vt LSX128:$vk)), (VANDN_V LSX128:$vj, LSX128:$vk)>; // VORN_V foreach vt = [v16i8, v8i16, v4i32, v2i64] in @@ -1645,6 +1657,11 @@ defm : PatVrVr<sra, "VSRA">; defm : PatShiftVrVr<sra, "VSRA">; defm : PatShiftVrSplatUimm<sra, "VSRAI">; +// VROTR[I]_{B/H/W/D} +defm : PatVrVr<rotr, "VROTR">; +defm : PatShiftVrVr<rotr, "VROTR">; +defm : PatShiftVrSplatUimm<rotr, "VROTRI">; + // VCLZ_{B/H/W/D} defm : PatVr<ctlz, "VCLZ">; @@ -1652,25 +1669,25 @@ defm : PatVr<ctlz, "VCLZ">; defm : PatVr<ctpop, "VPCNT">; // VBITCLR_{B/H/W/D} -def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), +def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), +def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), +def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), +def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; -def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, - (vsplati8imm7 v16i8:$vk)))), +def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$vk))), v16i8:$vj), (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; -def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, - (vsplati16imm15 v8i16:$vk)))), +def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$vk))), v8i16:$vj), (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; -def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, - (vsplati32imm31 v4i32:$vk)))), +def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$vk))), v4i32:$vj), (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; -def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, - (vsplati64imm63 v2i64:$vk)))), +def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, + (vsplati64imm63 v2i64:$vk))), v2i64:$vj), (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; // VBITCLRI_{B/H/W/D} @@ -1760,6 +1777,10 @@ defm : PatVrVrF<fmul, "VFMUL">; // VFDIV_{S/D} defm : PatVrVrF<fdiv, "VFDIV">; +// VFMAX_{S/D}, VFMIN_{S/D} +defm : PatVrVrF<fmaxnum, "VFMAX">; +defm : PatVrVrF<fminnum, "VFMIN">; + // VFMADD_{S/D} def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; @@ -2552,6 +2573,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)), (f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>; +defm : PatVrF<fceil, "VFRINTRP">; +defm : PatVrF<ffloor, "VFRINTRM">; +defm : PatVrF<ftrunc, "VFRINTRZ">; +defm : PatVrF<froundeven, "VFRINTRNE">; + // load def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), (VLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp new file mode 100644 index 0000000..c07adfc --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchSelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "LoongArchGenSDNodeInfo.inc" + +using namespace llvm; + +LoongArchSelectionDAGInfo::LoongArchSelectionDAGInfo() + : SelectionDAGGenTargetInfo(LoongArchGenSDNodeInfo) {} + +LoongArchSelectionDAGInfo::~LoongArchSelectionDAGInfo() = default; diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h new file mode 100644 index 0000000..7210a15 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "LoongArchGenSDNodeInfo.inc" + +namespace llvm { + +class LoongArchSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + LoongArchSelectionDAGInfo(); + + ~LoongArchSelectionDAGInfo() override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index 3acbe49..6293cbe 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -12,6 +12,7 @@ #include "LoongArchSubtarget.h" #include "LoongArchFrameLowering.h" +#include "LoongArchSelectionDAGInfo.h" #include "MCTargetDesc/LoongArchBaseInfo.h" using namespace llvm; @@ -95,4 +96,12 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), TLInfo(TM, *this) { + TSInfo = std::make_unique<LoongArchSelectionDAGInfo>(); +} + +LoongArchSubtarget::~LoongArchSubtarget() = default; + +const SelectionDAGTargetInfo *LoongArchSubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 5e12baf..b90542c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -18,7 +18,6 @@ #include "LoongArchInstrInfo.h" #include "LoongArchRegisterInfo.h" #include "MCTargetDesc/LoongArchBaseInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -45,9 +44,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; LoongArchFrameLowering FrameLowering; LoongArchInstrInfo InstrInfo; - LoongArchRegisterInfo RegInfo; LoongArchTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr<const SelectionDAGTargetInfo> TSInfo; Align PrefFunctionAlignment; Align PrefLoopAlignment; @@ -69,6 +67,8 @@ public: LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName, const TargetMachine &TM); + ~LoongArchSubtarget() override; + // Parses features string setting specified subtarget options. The // definition of this function is auto-generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); @@ -78,14 +78,13 @@ public: } const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } const LoongArchRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 9de4c9d..92a9388 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -62,6 +62,11 @@ static cl::opt<bool> cl::desc("Enable the merge base offset pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> + EnableSinkFold("loongarch-enable-sink-fold", + cl::desc("Enable sinking and folding of instruction copies"), + cl::init(true), cl::Hidden); + static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { return RM.value_or(Reloc::Static); } @@ -146,7 +151,9 @@ namespace { class LoongArchPassConfig : public TargetPassConfig { public: LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + setEnableSinkAndFold(EnableSinkFold); + } LoongArchTargetMachine &getLoongArchTargetMachine() const { return getTM<LoongArchTargetMachine>(); diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index f548a8d..5107c8d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { } } -// TODO: Implement more hooks to provide TTI machinery for LoongArch. +LoongArchTTIImpl::TTI::MemCmpExpansionOptions +LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + + if (!ST->hasUAL()) + return Options; + + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + Options.AllowOverlappingLoads = true; + + // TODO: Support for vectors. + if (ST->is64Bit()) { + Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; + } else { + Options.LoadSizes = {4, 2, 1}; + Options.AllowedTailExpansions = {3}; + } + + return Options; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index e3f16c7..9b479f9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -55,7 +55,8 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const override; - // TODO: Implement more hooks to provide TTI machinery for LoongArch. + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 7d54565..6d69af5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -39,7 +39,7 @@ LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, /*HasRelocationAddend=*/true) {} -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::~LoongArchELFObjectWriter() = default; unsigned LoongArchELFObjectWriter::getRelocType(const MCFixup &Fixup, const MCValue &Target, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index f0e2bc4..08fa51d 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -38,7 +38,7 @@ public: LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) : Ctx(ctx), MCII(MCII) {} - ~LoongArchMCCodeEmitter() override {} + ~LoongArchMCCodeEmitter() override = default; void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB, SmallVectorImpl<MCFixup> &Fixups, |
