diff options
Diffstat (limited to 'llvm/lib/Target/LoongArch')
8 files changed, 350 insertions, 199 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index d5a5f17..36c3011 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file describes the baisc single-precision floating-point instructions. +// This file describes the basic single-precision floating-point instructions. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index ac5e7f3..1493bf4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -158,7 +158,12 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. - if (!isInt<11>(MFI.estimateStackSize(MF))) + // For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit + // signed field is fine. + unsigned EstimateStackSize = MFI.estimateStackSize(MF); + if (!isInt<11>(EstimateStackSize) || + (MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() && + !isInt<7>(EstimateStackSize))) ScavSlotsNum = std::max(ScavSlotsNum, 1u); // For CFR spill. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index c47987f..e915a3c4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2514,8 +2514,9 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, assert(ResTy.isVector()); unsigned NumElts = ResTy.getVectorNumElements(); - SDValue Vector = DAG.getUNDEF(ResTy); - for (unsigned i = 0; i < NumElts; ++i) { + SDValue Vector = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0)); + for (unsigned i = 1; i < NumElts; ++i) { Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Node->getOperand(i), DAG.getConstant(i, DL, Subtarget.getGRLenVT())); @@ -2597,12 +2598,9 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VecTy = Op->getOperand(0)->getValueType(0); SDValue Idx = Op->getOperand(1); - EVT EltTy = VecTy.getVectorElementType(); unsigned NumElts = VecTy.getVectorNumElements(); - if (isa<ConstantSDNode>(Idx) && - (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || - EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) + if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts) return Op; return SDValue(); @@ -4563,6 +4561,80 @@ static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, llvm_unreachable("Unexpected node type for vXi1 sign extension"); } +static SDValue +performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse()) + return SDValue(); + + bool UseLASX; + unsigned Opc = ISD::DELETED_NODE; + EVT CmpVT = Src.getOperand(0).getValueType(); + EVT EltVT = CmpVT.getVectorElementType(); + + if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128) + UseLASX = false; + else if (Subtarget.has32S() && Subtarget.hasExtLASX() && + CmpVT.getSizeInBits() == 256) + UseLASX = true; + else + return SDValue(); + + SDValue SrcN1 = Src.getOperand(1); + switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) { + default: + break; + case ISD::SETEQ: + // x == 0 => not (vmsknez.b x) + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ; + break; + case ISD::SETGT: + // x > -1 => vmskgez.b x + if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; + break; + case ISD::SETGE: + // x >= 0 => vmskgez.b x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; + break; + case ISD::SETLT: + // x < 0 => vmskltz.{b,h,w,d} x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && + (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || + EltVT == MVT::i64)) + Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; + break; + case ISD::SETLE: + // x <= -1 => vmskltz.{b,h,w,d} x + if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && + (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || + EltVT == MVT::i64)) + Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; + break; + case ISD::SETNE: + // x != 0 => vmsknez.b x + if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) + Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ; + break; + } + + if (Opc == ISD::DELETED_NODE) + return SDValue(); + + SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0)); + EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); + V = DAG.getZExtOrTrunc(V, DL, T); + return DAG.getBitcast(VT, V); +} + static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { @@ -4577,110 +4649,63 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) return SDValue(); - unsigned Opc = ISD::DELETED_NODE; // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible + SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget); + if (Res) + return Res; + + // Generate vXi1 using [X]VMSKLTZ + MVT SExtVT; + unsigned Opc; + bool UseLASX = false; + bool PropagateSExt = false; + if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) { - bool UseLASX; EVT CmpVT = Src.getOperand(0).getValueType(); - EVT EltVT = CmpVT.getVectorElementType(); - - if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= 128) - UseLASX = false; - else if (Subtarget.has32S() && Subtarget.hasExtLASX() && - CmpVT.getSizeInBits() <= 256) - UseLASX = true; - else + if (CmpVT.getSizeInBits() > 256) return SDValue(); - - SDValue SrcN1 = Src.getOperand(1); - switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) { - default: - break; - case ISD::SETEQ: - // x == 0 => not (vmsknez.b x) - if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) - Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ; - break; - case ISD::SETGT: - // x > -1 => vmskgez.b x - if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8) - Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; - break; - case ISD::SETGE: - // x >= 0 => vmskgez.b x - if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) - Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; - break; - case ISD::SETLT: - // x < 0 => vmskltz.{b,h,w,d} x - if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && - (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || - EltVT == MVT::i64)) - Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; - break; - case ISD::SETLE: - // x <= -1 => vmskltz.{b,h,w,d} x - if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && - (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || - EltVT == MVT::i64)) - Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; - break; - case ISD::SETNE: - // x != 0 => vmsknez.b x - if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8) - Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ; - break; - } } - // Generate vXi1 using [X]VMSKLTZ - if (Opc == ISD::DELETED_NODE) { - MVT SExtVT; - bool UseLASX = false; - bool PropagateSExt = false; - switch (SrcVT.getSimpleVT().SimpleTy) { - default: - return SDValue(); - case MVT::v2i1: - SExtVT = MVT::v2i64; - break; - case MVT::v4i1: - SExtVT = MVT::v4i32; - if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { - SExtVT = MVT::v4i64; - UseLASX = true; - PropagateSExt = true; - } - break; - case MVT::v8i1: - SExtVT = MVT::v8i16; - if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { - SExtVT = MVT::v8i32; - UseLASX = true; - PropagateSExt = true; - } - break; - case MVT::v16i1: - SExtVT = MVT::v16i8; - if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { - SExtVT = MVT::v16i16; - UseLASX = true; - PropagateSExt = true; - } - break; - case MVT::v32i1: - SExtVT = MVT::v32i8; + switch (SrcVT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i1: + SExtVT = MVT::v2i64; + break; + case MVT::v4i1: + SExtVT = MVT::v4i32; + if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { + SExtVT = MVT::v4i64; UseLASX = true; - break; - }; - if (UseLASX && !Subtarget.has32S() && !Subtarget.hasExtLASX()) - return SDValue(); - Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) - : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); - Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; - } else { - Src = Src.getOperand(0); - } + PropagateSExt = true; + } + break; + case MVT::v8i1: + SExtVT = MVT::v8i16; + if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { + SExtVT = MVT::v8i32; + UseLASX = true; + PropagateSExt = true; + } + break; + case MVT::v16i1: + SExtVT = MVT::v16i8; + if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) { + SExtVT = MVT::v16i16; + UseLASX = true; + PropagateSExt = true; + } + break; + case MVT::v32i1: + SExtVT = MVT::v32i8; + UseLASX = true; + break; + }; + if (UseLASX && !(Subtarget.has32S() && Subtarget.hasExtLASX())) + return SDValue(); + Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) + : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src); EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); @@ -6003,10 +6028,9 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, Register ScratchReg1 = XSrc; if (Idx >= HalfSize) { ScratchReg1 = MRI.createVirtualRegister(RC); - BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) - .addReg(XSrc) + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_D), ScratchReg1) .addReg(XSrc) - .addImm(1); + .addImm(14); } Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 95e9fd4..5096a8f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1282,6 +1282,32 @@ multiclass PatCCXrXrF<CondCode CC, string Inst> { (!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>; } +multiclass PairInsertExtractPatV8<ValueType vecty, ValueType elemty> { + foreach imm1 = 0...3 in { + foreach imm2 = 0...3 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert (vector_insert vecty:$xd, + (elemty (vector_extract vecty:$xj, imm1)), imm2), + (elemty (vector_extract vecty:$xj, !add(imm1, 4))), + !add(imm2, 4)), + (XVEXTRINS_W $xd, $xj, Imm)>; + } + } +} + +multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> { + foreach imm1 = 0...1 in { + foreach imm2 = 0...1 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert (vector_insert vecty:$xd, + (elemty (vector_extract vecty:$xj, imm1)), imm2), + (elemty (vector_extract vecty:$xj, !add(imm1, 2))), + !add(imm2, 2)), + (XVEXTRINS_D $xd, $xj, Imm)>; + } + } +} + let Predicates = [HasExtLASX] in { // XVADD_{B/H/W/D} @@ -1582,6 +1608,38 @@ defm : PatCCXrXrF<SETUNE, "XVFCMP_CUNE">; defm : PatCCXrXrF<SETO, "XVFCMP_COR">; defm : PatCCXrXrF<SETUO, "XVFCMP_CUN">; +// Insert two elements extracted from vector into vector. (The positions +// of the two elements must be same in the source or destination vector's +// front and back 128bits.) +// 2*XVPICKVE2GR_{W/D} + 2*XVINSGR2VR_{W/D} -> XVEXTRINS_{W/D} +// XVPERMI_D + 2*XVPICKVE2GR_{B/H} + 2*PseudoXVINSGR2VR_{B/H} -> XVEXTRINS_{W/D} +foreach imm1 = 0...15 in { + foreach imm2 = 0...15 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert (vector_insert v32i8:$xd, + (GRLenVT (vector_extract v32i8:$xj, imm1)), imm2), + (GRLenVT (vector_extract v32i8:$xj, !add(imm1, 16))), + !add(imm2, 16)), + (XVEXTRINS_B $xd, $xj, Imm)>; + } +} + +foreach imm1 = 0...7 in { + foreach imm2 = 0...7 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert (vector_insert v16i16:$xd, + (GRLenVT (vector_extract v16i16:$xj, imm1)), imm2), + (GRLenVT (vector_extract v16i16:$xj, !add(imm1, 8))), + !add(imm2, 8)), + (XVEXTRINS_H $xd, $xj, Imm)>; + } +} + +defm : PairInsertExtractPatV8<v8i32, GRLenVT>; +defm : PairInsertExtractPatV8<v8f32, f32>; +defm : PairInsertExtractPatV4<v4i64, GRLenVT>; +defm : PairInsertExtractPatV4<v4f64, f64>; + // PseudoXVINSGR2VR_{B/H} def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; @@ -1593,11 +1651,20 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; - -def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), - (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; -def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), - (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; +def : Pat<(vector_insert v8f32:$xd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm3:$imm), + (XVINSGR2VR_W $xd, $rj, uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$xd, (f64 (bitconvert i64:$rj)), uimm2:$imm), + (XVINSGR2VR_D $xd, $rj, uimm2:$imm)>; +def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2), + (XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>; +def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2), + (XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>; + +// XVINSVE0_{W/D} +def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm), + (XVINSVE0_W $xd, (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm), + (XVINSVE0_D $xd, (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), uimm2:$imm)>; // scalar_to_vector def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)), @@ -1790,7 +1857,25 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { def : RegRegStPat<store, XVSTX, LASX256, vt>; } +// Bitcast float/double element extracted from vector to integer. +def : Pat<(loongarch_movfr2gr_s_la64 (f32 (vector_extract v8f32:$xj, uimm3:$imm))), + (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm)>; +def : Pat<(i64 (bitconvert (f64 (vector_extract v4f64:$xj, uimm2:$imm)))), + (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm)>; + // Vector extraction with constant index. +foreach imm = 16...31 in { + defvar Imm = !and(imm, 15); + def : Pat<(i64 (vector_extract v32i8:$xj, imm)), + (VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128), + Imm)>; +} +foreach imm = 8...15 in { + defvar Imm = !and(imm, 7); + def : Pat<(i64 (vector_extract v16i16:$xj, imm)), + (VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128), + Imm)>; +} def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index d73d780..3c9defb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1482,6 +1482,28 @@ multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst, (Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>; } +multiclass InsertExtractPatV4<ValueType vecty, ValueType elemty> { + foreach imm1 = 0...3 in { + foreach imm2 = 0...3 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert vecty:$vd, + (elemty (vector_extract vecty:$vj, imm1)), imm2), + (VEXTRINS_W $vd, $vj, Imm)>; + } + } +} + +multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> { + foreach imm1 = 0...1 in { + foreach imm2 = 0...1 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert vecty:$vd, + (elemty (vector_extract vecty:$vj, imm1)), imm2), + (VEXTRINS_D $vd, $vj, Imm)>; + } + } +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -1782,6 +1804,31 @@ defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">; defm : PatCCVrVrF<SETO, "VFCMP_COR">; defm : PatCCVrVrF<SETUO, "VFCMP_CUN">; +// Insert element extracted from vector into vector. +// VPICKVE2GR_{B/H/W/D} + VINSGR2VR_{B/H/W/D} -> VEXTRINS_{B/H/W/D} +foreach imm1 = 0...15 in { + foreach imm2 = 0...15 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert v16i8:$vd, + (GRLenVT (vector_extract v16i8:$vj, imm1)), imm2), + (VEXTRINS_B $vd, $vj, Imm)>; + } +} + +foreach imm1 = 0...7 in { + foreach imm2 = 0...7 in { + defvar Imm = !or(!shl(imm2, 4), imm1); + def : Pat<(vector_insert v8i16:$vd, + (GRLenVT (vector_extract v8i16:$vj, imm1)), imm2), + (VEXTRINS_H $vd, $vj, Imm)>; + } +} + +defm : InsertExtractPatV4<v4i32, GRLenVT>; +defm : InsertExtractPatV4<v4f32, f32>; +defm : InsertExtractPatV2<v2i64, GRLenVT>; +defm : InsertExtractPatV2<v2f64, f64>; + // VINSGR2VR_{B/H/W/D} def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; @@ -1791,11 +1838,23 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; +def : Pat<(vector_insert v4f32:$vd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm2:$imm), + (VINSGR2VR_W $vd, $rj, uimm2:$imm)>; +def : Pat<(vector_insert v2f64:$vd, (f64 (bitconvert i64:$rj)), uimm1:$imm), + (VINSGR2VR_D $vd, $rj, uimm1:$imm)>; + +// VEXTRINS_{W/D} +foreach imm = 0...3 in { + defvar Imm = !shl(imm, 4); + def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, imm), + (VEXTRINS_W $vd, (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), Imm)>; +} -def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), - (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; -def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), - (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; +foreach imm = 0...1 in { + defvar Imm = !shl(imm, 4); + def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, imm), + (VEXTRINS_D $vd, (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), Imm)>; +} // scalar_to_vector def : Pat<(v4f32 (scalar_to_vector FPR32:$fj)), @@ -1990,6 +2049,12 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { def : RegRegStPat<store, VSTX, LSX128, vt>; } +// Bitcast float/double element extracted from vector to integer. +def : Pat<(loongarch_movfr2gr_s_la64 (f32 (vector_extract v4f32:$vj, uimm2:$imm))), + (VPICKVE2GR_W v4f32:$vj, uimm2:$imm)>; +def : Pat<(i64 (bitconvert (f64 (vector_extract v2f64:$vj, uimm1:$imm)))), + (VPICKVE2GR_D v2f64:$vj, uimm1:$imm)>; + // Vector extraction with constant index. def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 7b9f115..8fa72bc 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -177,74 +177,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, } } -// Linker relaxation may change code size. We have to insert Nops -// for .align directive when linker relaxation enabled. So then Linker -// could satisfy alignment by removing Nops. -// The function returns the total Nops Size we need to insert. -bool LoongArchAsmBackend::shouldInsertExtraNopBytesForCodeAlign( - const MCAlignFragment &AF, unsigned &Size) { - // Calculate Nops Size only when linker relaxation enabled. - if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) - return false; - - // Ignore alignment if MaxBytesToEmit is less than the minimum Nop size. - const unsigned MinNopLen = 4; - if (AF.getMaxBytesToEmit() < MinNopLen) - return false; - Size = AF.getAlignment().value() - MinNopLen; - return AF.getAlignment() > MinNopLen; -} - -// We need to insert R_LARCH_ALIGN relocation type to indicate the -// position of Nops and the total bytes of the Nops have been inserted -// when linker relaxation enabled. -// The function inserts fixup_loongarch_align fixup which eventually will -// transfer to R_LARCH_ALIGN relocation type. -// The improved R_LARCH_ALIGN requires symbol index. The lowest 8 bits of -// addend represent alignment and the other bits of addend represent the -// maximum number of bytes to emit. The maximum number of bytes is zero -// means ignore the emit limit. -bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) { - // Insert the fixup only when linker relaxation enabled. - if (!AF.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) - return false; - - // Calculate total Nops we need to insert. If there are none to insert - // then simply return. - unsigned InsertedNopBytes; - if (!shouldInsertExtraNopBytesForCodeAlign(AF, InsertedNopBytes)) - return false; - - MCSection *Sec = AF.getParent(); - MCContext &Ctx = getContext(); - const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); - MCFixup Fixup = MCFixup::create(0, Dummy, ELF::R_LARCH_ALIGN); - unsigned MaxBytesToEmit = AF.getMaxBytesToEmit(); - - auto createExtendedValue = [&]() { - const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec]; - if (MCSym == nullptr) { - // Define a marker symbol at the section with an offset of 0. - MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align"); - Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); - Asm.registerSymbol(*Sym); - MCSym = MCSymbolRefExpr::create(Sym, Ctx); - getSecToAlignSym()[Sec] = MCSym; - } - return MCValue::get(&MCSym->getSymbol(), nullptr, - MaxBytesToEmit << 8 | Log2(AF.getAlignment())); - }; - - uint64_t FixedValue = 0; - MCValue Value = MaxBytesToEmit >= InsertedNopBytes - ? MCValue::get(InsertedNopBytes) - : createExtendedValue(); - Asm.getWriter().recordRelocation(AF, Fixup, Value, FixedValue); - - return true; -} - bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target) { switch (Fixup.getKind()) { @@ -279,6 +211,53 @@ getRelocPairForSize(unsigned Size) { } } +// Check if an R_LARCH_ALIGN relocation is needed for an alignment directive. +// If conditions are met, compute the padding size and create a fixup encoding +// the padding size in the addend. If MaxBytesToEmit is smaller than the padding +// size, the fixup encodes MaxBytesToEmit in the higher bits and references a +// per-section marker symbol. +bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { + // Use default handling unless linker relaxation is enabled and the + // MaxBytesToEmit >= the nop size. + if (!F.getSubtargetInfo()->hasFeature(LoongArch::FeatureRelax)) + return false; + const unsigned MinNopLen = 4; + unsigned MaxBytesToEmit = F.getAlignMaxBytesToEmit(); + if (MaxBytesToEmit < MinNopLen) + return false; + + Size = F.getAlignment().value() - MinNopLen; + if (F.getAlignment() <= MinNopLen) + return false; + + MCContext &Ctx = getContext(); + const MCExpr *Expr = nullptr; + if (MaxBytesToEmit >= Size) { + Expr = MCConstantExpr::create(Size, getContext()); + } else { + MCSection *Sec = F.getParent(); + const MCSymbolRefExpr *SymRef = getSecToAlignSym()[Sec]; + if (SymRef == nullptr) { + // Define a marker symbol at the section with an offset of 0. + MCSymbol *Sym = Ctx.createNamedTempSymbol("la-relax-align"); + Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); + Asm->registerSymbol(*Sym); + SymRef = MCSymbolRefExpr::create(Sym, Ctx); + getSecToAlignSym()[Sec] = SymRef; + } + Expr = MCBinaryExpr::createAdd( + SymRef, + MCConstantExpr::create((MaxBytesToEmit << 8) | Log2(F.getAlignment()), + Ctx), + Ctx); + } + MCFixup Fixup = + MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN); + F.setVarFixups({Fixup}); + F.getParent()->setLinkerRelaxable(); + return true; +} + std::pair<bool, bool> LoongArchAsmBackend::relaxLEB128(MCFragment &F, int64_t &Value) const { const MCExpr &Expr = F.getLEBValue(); @@ -434,7 +413,7 @@ bool LoongArchAsmBackend::isPCRelFixupResolved(const MCSymbol *SymA, // Otherwise, check if the offset between the symbol and fragment is fully // resolved, unaffected by linker-relaxable fragments (e.g. instructions or - // offset-affected MCAlignFragment). Complements the generic + // offset-affected FT_Align fragments). Complements the generic // isSymbolRefDifferenceFullyResolvedImpl. if (!PCRelTemp) PCRelTemp = getContext().createTempSymbol(); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index b32ba06..3d929fc 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -45,20 +45,13 @@ public: MutableArrayRef<char> Data, uint64_t Value, bool IsResolved) override; - // Return Size with extra Nop Bytes for alignment directive in code section. - bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, - unsigned &Size) override; - - // Insert target specific fixup type for alignment directive in code section. - bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) override; - bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target); std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override; + bool relaxAlign(MCFragment &F, unsigned &Size) override; bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override; bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override; std::pair<bool, bool> relaxLEB128(MCFragment &F, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp index 03ce004..7cefb3f 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp @@ -52,6 +52,9 @@ static ABI getTripleABI(const Triple &TT) { bool Is64Bit = TT.isArch64Bit(); ABI TripleABI; switch (TT.getEnvironment()) { + case llvm::Triple::EnvironmentType::UnknownEnvironment: + TripleABI = ABI_Unknown; + break; case llvm::Triple::EnvironmentType::GNUSF: case llvm::Triple::EnvironmentType::MuslSF: TripleABI = Is64Bit ? ABI_LP64S : ABI_ILP32S; @@ -96,7 +99,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, // 1. If the '-target-abi' is valid, use it. if (IsABIValidForFeature(ArgProvidedABI)) { - if (TT.hasEnvironment() && ArgProvidedABI != TripleABI) + if (IsABIValidForFeature(TripleABI) && ArgProvidedABI != TripleABI) errs() << "warning: triple-implied ABI conflicts with provided target-abi '" << ABIName << "', using target-abi\n"; @@ -164,10 +167,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, return Is64Bit ? ABI_LP64F : ABI_ILP32F; return Is64Bit ? ABI_LP64S : ABI_ILP32S; }; - if (ABIName.empty()) - errs() << "warning: the triple-implied ABI is invalid, ignoring and using " - "feature-implied ABI\n"; - else + if (!ABIName.empty()) errs() << "warning: both target-abi and the triple-implied ABI are " "invalid, ignoring and using feature-implied ABI\n"; return checkABIStandardized(GetFeatureABI()); |