diff options
Diffstat (limited to 'llvm/lib/Target')
37 files changed, 315 insertions, 356 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 70d5ad7d..dc8e7c8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32)); + DAG.getTargetConstant(Cnt, DL, MVT::i32)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, MVT::i32), @@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), - DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags()); + DAG.getTargetConstant(Cnt, DL, MVT::i32), + Op->getFlags()); } // Right shift register. Note, there is not a shift right register @@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), - Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); + Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32)); // We can handle smaller integers by generating an extra trunc. if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); @@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100); SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100); SDValue NewShiftConstant = - DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32); + DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32); return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant); } @@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32)); + DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); return Op; } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { Op = DAG.getNode(Opcode, DL, VT, Op, - DAG.getConstant(ShiftAmount, DL, MVT::i32)); + DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); if (N->getValueType(0) == MVT::i64) Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, DAG.getConstant(0, DL, MVT::i64)); @@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) { Op.getOperand(ExtOffset == 0 ? 0 : 1)); if (Shift != 0) BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC, - DAG.getConstant(Shift, DL, MVT::i32)); + DAG.getTargetConstant(Shift, DL, MVT::i32)); return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT)); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6ef0a95..09ce713 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>; def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>; def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>; -def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR8OpValue"; let DecoderMethod = "DecodeVecShiftR8Imm"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16Imm"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); }]> { let EncoderMethod = "getVecShiftR16OpValue"; let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; let ParserMatchClass = Imm1_8Operand; } -def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32Imm"; let ParserMatchClass = Imm1_32Operand; } -def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); }]> { let EncoderMethod = "getVecShiftR32OpValue"; let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; let ParserMatchClass = Imm1_16Operand; } -def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); }]> { let EncoderMethod = "getVecShiftR64OpValue"; let DecoderMethod = "DecodeVecShiftR64Imm"; let ParserMatchClass = Imm1_64Operand; } -def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); }]> { let EncoderMethod = "getVecShiftR64OpValue"; @@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm1_32Operand; } -// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} - def Imm0_0Operand : AsmImmRange<0, 0>; def Imm0_1Operand : AsmImmRange<0, 1>; def Imm1_1Operand : AsmImmRange<1, 1>; @@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_31Operand : AsmImmRange<0, 31>; def Imm0_63Operand : AsmImmRange<0, 63>; -def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 8); }]> { let EncoderMethod = "getVecShiftL8OpValue"; let DecoderMethod = "DecodeVecShiftL8Imm"; let ParserMatchClass = Imm0_7Operand; } -def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 16); }]> { let EncoderMethod = "getVecShiftL16OpValue"; let DecoderMethod = "DecodeVecShiftL16Imm"; let ParserMatchClass = Imm0_15Operand; } -def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 32); }]> { let EncoderMethod = "getVecShiftL32OpValue"; let DecoderMethod = "DecodeVecShiftL32Imm"; let ParserMatchClass = Imm0_31Operand; } -def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ +def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ return (((uint32_t)Imm) < 64); }]> { let EncoderMethod = "getVecShiftL64OpValue"; @@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_63Operand; } -// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant -// (ImmLeaf) -def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 8); -}]> { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 16); -}]> { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 32); -}]> { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ - return (((uint32_t)Imm) < 64); -}]> { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} // Crazy immediate formats used by 32-bit and 64-bit logical immediate // instructions for splatting repeating bit patterns across the immediate. @@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } @@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> { bits<4> imm; let Inst{19-16} = imm; } @@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> { bits<5> imm; let Inst{20-16} = imm; } @@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, V128, V128, vecshiftR64, asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> { bits<6> imm; let Inst{21-16} = imm; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 36c9cb6..bc6b931 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1010,6 +1010,36 @@ let Predicates = [HasSVE_or_SME] in { defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; + + // mul x (splat -1) -> neg x + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D $Op2, $Op1, $Op2)>; + + let AddedComplexity = 5 in { + def : Pat<(nxv16i8 (AArch64mul_p nxv16i1:$Op1, nxv16i8:$Op2, (nxv16i8 (splat_vector (i32 -1))))), + (NEG_ZPmZ_B_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_p nxv8i1:$Op1, nxv8i16:$Op2, (nxv8i16 (splat_vector (i32 -1))))), + (NEG_ZPmZ_H_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_p nxv4i1:$Op1, nxv4i32:$Op2, (nxv4i32 (splat_vector (i32 -1))))), + (NEG_ZPmZ_S_UNDEF $Op2, $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_p nxv2i1:$Op1, nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 -1))))), + (NEG_ZPmZ_D_UNDEF $Op2, $Op1, $Op2)>; + } + + def : Pat<(nxv16i8 (AArch64mul_m1 nxv16i1:$Op1, (nxv16i8 (splat_vector (i32 -1))), nxv16i8:$Op2)), + (NEG_ZPmZ_B (DUP_ZI_B -1, 0), $Op1, $Op2)>; + def : Pat<(nxv8i16 (AArch64mul_m1 nxv8i1:$Op1, (nxv8i16 (splat_vector (i32 -1))), nxv8i16:$Op2)), + (NEG_ZPmZ_H (DUP_ZI_H -1, 0), $Op1, $Op2)>; + def : Pat<(nxv4i32 (AArch64mul_m1 nxv4i1:$Op1, (nxv4i32 (splat_vector (i32 -1))), nxv4i32:$Op2)), + (NEG_ZPmZ_S (DUP_ZI_S -1, 0), $Op1, $Op2)>; + def : Pat<(nxv2i64 (AArch64mul_m1 nxv2i1:$Op1, (nxv2i64 (splat_vector (i64 -1))), nxv2i64:$Op2)), + (NEG_ZPmZ_D (DUP_ZI_D -1, 0), $Op1, $Op2)>; } // End HasSVE_or_SME // COMPACT - word and doubleword diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 96cc3f3..3e55b76 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); // Need special instructions for atomics that affect ordering. - if (Order != AtomicOrdering::NotAtomic && - Order != AtomicOrdering::Unordered && - Order != AtomicOrdering::Monotonic) { + if (isStrongerThanMonotonic(Order)) { assert(!isa<GZExtLoad>(LdSt)); assert(MemSizeInBytes <= 8 && "128-bit atomics should already be custom-legalized"); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 6025f1c..63313da 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned NewOpc = Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; MachineIRBuilder MIB(MI); - auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); - MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); + MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 539470d..be44b8f 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> { //===----------------------------------------------------------------------===// // SME2 multi-vec saturating shift right narrow class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<4> imm4; @@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> { def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>; - def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, @@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, } multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { - def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, + def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32, mnemonic>{ bits<5> imm; let Inst{20-16} = imm; } - def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, + def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64, mnemonic> { bits<6> imm; let Inst{22} = imm{5}; let Inst{20-16} = imm{4-0}; } - def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>; - def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>; + def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9a23c35..3cdd505 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm, ZPR64, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>; - def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, @@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, @@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>; def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>; @@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, @@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, - tvecshiftR8>; + vecshiftR8>; def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, - tvecshiftR16> { + vecshiftR16> { let Inst{19} = imm{3}; } def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, - tvecshiftR32> { + vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; } class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, @@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> { let Inst{20-19} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm, } multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> { - def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>; - def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>; - def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>; - def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>; + def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>; + def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>; + def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>; + def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>; } multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, @@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps, let Inst{9-8} = imm{4-3}; } - def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; - def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; - def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; - def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>; } // As above but shift amount takes the form of a "vector immediate". @@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>; def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>; } class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, @@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte // SVE2 multi-vec shift narrow class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> - : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4), mnemonic, "\t$Zd, $Zn, $imm4", "", []>, Sched<[]> { bits<5> Zd; @@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz> multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> { def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>; - def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; + def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7003a40..9446144 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2126,6 +2126,8 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureLdsBarrierArriveAtomic, FeatureSetPrioIncWgInst, Feature45BitNumRecordsBufferResource, + FeatureSupportsXNACK, + FeatureXNACK, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2ba3156..cb49936 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) { return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; } -/// Returns true if the function requires the implicit argument be passed -/// regardless of the function contents. -static bool funcRequiresHostcallPtr(const Function &F) { - // Sanitizers require the hostcall buffer passed in the implicit arguments. +/// Returns true if sanitizer attributes are present on a function. +static bool hasSanitizerAttributes(const Function &F) { return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || F.hasFnAttribute(Attribute::SanitizeMemory) || @@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsHostcall = funcRequiresHostcallPtr(*F); - if (NeedsHostcall) { + // Flat scratch initialization is needed because `asan_malloc_impl` + // calls introduced later in pipeline will have flat scratch accesses. + // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs + // implementation for `asan_malloc_impl` is updated. + const bool HasSanitizerAttrs = hasSanitizerAttributes(*F); + if (HasSanitizerAttrs) { removeAssumedBits(IMPLICIT_ARG_PTR); removeAssumedBits(HOSTCALL_PTR); + removeAssumedBits(FLAT_SCRATCH_INIT); } for (auto Attr : ImplicitAttrs) { - if (NeedsHostcall && - (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) + if (HasSanitizerAttrs && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR || + Attr.first == FLAT_SCRATCH_INIT)) continue; if (F->hasFnAttribute(Attr.second)) @@ -1220,12 +1224,9 @@ static bool inlineAsmUsesAGPRs(const InlineAsm *IA) { } // TODO: Migrate to range merge of amdgpu-agpr-alloc. -// FIXME: Why is this using Attribute::NoUnwind? -struct AAAMDGPUNoAGPR - : public IRAttribute<Attribute::NoUnwind, - StateWrapper<BooleanState, AbstractAttribute>, - AAAMDGPUNoAGPR> { - AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} +struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { + using Base = StateWrapper<BooleanState, AbstractAttribute>; + AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {} static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP, Attributor &A) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 2d5ae29..2120bf8 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2303,7 +2303,10 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; if (!hasArchitectedFlatScratch()) KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK); + assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK)); + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask + << '\n'; KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY); diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index fed3778..90c828b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -722,7 +722,8 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const { return false; } - if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) { + if (New->getReg().isVirtual() && + !MRI->constrainRegClass(New->getReg(), ConstrainRC)) { LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI) << TRI->getRegClassName(ConstrainRC) << '\n'); return false; @@ -931,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII, for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg); SubDef && TII.isFoldableCopy(*SubDef); SubDef = MRI.getVRegDef(Sub->getReg())) { - MachineOperand &SrcOp = SubDef->getOperand(1); + unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef); + MachineOperand &SrcOp = SubDef->getOperand(SrcIdx); + if (SrcOp.isImm()) return &SrcOp; if (!SrcOp.isReg() || SrcOp.getReg().isPhysical()) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index e4b3528..0189e7b 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -306,7 +306,8 @@ class PrologEpilogSGPRSpillBuilder { buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, FI, FrameReg, DwordOff); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + assert(SubReg.isPhysical()); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) .addReg(TmpVGPR, RegState::Kill); DwordOff += 4; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f7265c5..e233457 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const { return Flags; } -bool SITargetLowering::checkForPhysRegDependency( - SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const { - if (User->getOpcode() != ISD::CopyToReg) - return false; - if (!Def->isMachineOpcode()) - return false; - MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def); - if (!MDef) - return false; - - unsigned ResNo = User->getOperand(Op).getResNo(); - if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1) - return false; - const MCInstrDesc &II = TII->get(MDef->getMachineOpcode()); - if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) { - PhysReg = AMDGPU::SCC; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo)); - Cost = RC->getCopyCost(); - return true; - } - return false; -} - void SITargetLowering::emitExpandAtomicAddrSpacePredicate( Instruction *AI) const { // Given: atomicrmw fadd ptr %addr, float %val ordering diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index a474dab..74e58f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -561,11 +561,6 @@ public: bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; - bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII, - MCRegister &PhysReg, int &Cost) const override; - bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 56435a5..46757cf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2112,8 +2112,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::SI_RESTORE_S32_FROM_VGPR: MI.setDesc(get(AMDGPU::V_READLANE_B32)); - MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(), - &AMDGPU::SReg_32_XM0RegClass); break; case AMDGPU::AV_MOV_B32_IMM_PSEUDO: { Register Dst = MI.getOperand(0).getReg(); @@ -3435,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { } } +unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::V_MOV_B16_t16_e32: + case AMDGPU::V_MOV_B16_t16_e64: + return 2; + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B64_e32: + case AMDGPU::V_MOV_B64_e64: + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::S_MOV_B64_IMM_PSEUDO: + case AMDGPU::COPY: + case AMDGPU::WWM_COPY: + case AMDGPU::V_ACCVGPR_WRITE_B32_e64: + case AMDGPU::V_ACCVGPR_READ_B32_e64: + case AMDGPU::V_ACCVGPR_MOV_B32: + case AMDGPU::AV_MOV_B32_IMM_PSEUDO: + case AMDGPU::AV_MOV_B64_IMM_PSEUDO: + return 1; + default: + llvm_unreachable("MI is not a foldable copy"); + } +} + static constexpr AMDGPU::OpName ModifierOpNames[] = { AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp, @@ -8117,21 +8141,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, // hope for the best. if (Inst.isCopy() && DstReg.isPhysical() && RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { - // TODO: Only works for 32 bit registers. - if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) { - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), DstReg) - .add(Inst.getOperand(1)); - } else { - Register NewDst = - MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::V_READFIRSTLANE_B32), NewDst) - .add(Inst.getOperand(1)); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), - DstReg) - .addReg(NewDst); - } + Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), NewDst) + .add(Inst.getOperand(1)); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), + DstReg) + .addReg(NewDst); + Inst.eraseFromParent(); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a21089f..cc59acf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -417,6 +417,7 @@ public: const MachineInstr &MIb) const override; static bool isFoldableCopy(const MachineInstr &MI); + static unsigned getFoldableCopySrcIdx(const MachineInstr &MI); void removeModOperands(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 205237f..3c2dd42 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2222,8 +2222,6 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, // Don't need to write VGPR out. } - MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - // Restore clobbered registers in the specified restore block. MI = RestoreMBB.end(); SB.setMI(&RestoreMBB, MI); @@ -2238,7 +2236,8 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, SB.NumSubRegs == 1 ? SB.SuperReg : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); - MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass); + + assert(SubReg.isPhysical()); bool LastSubReg = (i + 1 == e); auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) @@ -3059,8 +3058,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (IsSALU && LiveSCC) { Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); + assert(ResultReg.isPhysical()); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, @@ -3190,8 +3188,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register NewDest; if (IsCopy) { - MF->getRegInfo().constrainRegClass(ResultReg, - &AMDGPU::SReg_32_XM0RegClass); NewDest = ResultReg; } else { NewDest = RS->scavengeRegisterBackwards( diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5630580..82fc240 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -367,19 +367,6 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let BaseClassOrder = 10000; } -def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { - let CopyCost = 1; - let isAllocatable = 0; - let HasSGPR = 1; -} - -def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> { - let CopyCost = 1; - let Size = 16; - let isAllocatable = 0; - let HasSGPR = 1; -} - // TODO: Do we need to set DwarfRegAlias on register tuples? def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, @@ -774,12 +761,6 @@ def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", Reg128Types.types, 32, let BaseClassOrder = 10000; } -def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, - (add LDS_DIRECT)> { - let isAllocatable = 0; - let CopyCost = -1; -} - let GeneratePressureSet = 0, HasSGPR = 1 in { // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. @@ -797,7 +778,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16, SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16, - EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, + EXEC_LO_LO16, EXEC_HI_LO16, M0_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16, SRC_FLAT_SCRATCH_BASE_HI_LO16)> { let Size = 16; let isAllocatable = 0; @@ -805,7 +786,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, } def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0_XEXEC, M0_CLASS)> { + (add SReg_32_XM0_XEXEC, M0)> { let AllocationPriority = 0; } @@ -830,7 +811,7 @@ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, - (add SReg_32_XM0, M0_CLASS)> { + (add SReg_32_XM0, M0)> { let AllocationPriority = 0; let HasSGPR = 1; let BaseClassOrder = 32; @@ -842,7 +823,7 @@ def SGPR_NULL256 : SIReg<"null">; let GeneratePressureSet = 0 in { def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, - (add SReg_32, LDS_DIRECT_CLASS)> { + (add SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasSGPR = 1; let Size = 32; @@ -981,7 +962,7 @@ defm "" : SRegClass<32, Reg1024Types.types, SGPR_1024Regs>; } def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, - (add VGPR_32, LDS_DIRECT_CLASS)> { + (add VGPR_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let Size = 32; @@ -1096,21 +1077,21 @@ def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> { } def VS_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add VGPR_16, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_16, SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let Size = 16; } def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add VGPR_16_Lo128, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_16_Lo128, SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let Size = 16; } def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, - (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_32, SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; @@ -1118,7 +1099,7 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v } def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, - (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_32_Lo128, SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; @@ -1126,7 +1107,7 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2 } def VS_32_Lo256 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, - (add VGPR_32_Lo256, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_32_Lo256, SReg_32, LDS_DIRECT)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td index 54e27a6..f4326de 100644 --- a/llvm/lib/Target/ARM/ARMInstrCDE.td +++ b/llvm/lib/Target/ARM/ARMInstrCDE.td @@ -268,6 +268,7 @@ class CDE_Vec_Instr<bit acc, dag oops, dag iops, string asm, string cstr, !con(iops, (ins vpred:$vp)), asm, !strconcat(cstr, vpred.vpred_constraint)>, CDE_RequiresQReg { + bits<0> vp; } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 9dffd94..e244134 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -409,6 +409,7 @@ class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname, !strconcat(iname, "${vp}", !if(!eq(suffix, ""), "", !strconcat(".", suffix))), ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> { + bits<0> vp; let Inst{31-29} = 0b111; let Inst{27-26} = 0b11; } diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d358913..e67db8e 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -639,6 +639,43 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeVpredNOperand(MCInst &Inst, + const MCDisassembler *Decoder) { + const auto *D = static_cast<const ARMDisassembler *>(Decoder); + unsigned VCC = D->VPTBlock.getVPTPred(); + MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0; + + Inst.addOperand(MCOperand::createImm(VCC)); // $cond + Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg + Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeVpredROperand(MCInst &Inst, + const MCDisassembler *Decoder) { + const auto *D = static_cast<const ARMDisassembler *>(Decoder); + unsigned VCC = D->VPTBlock.getVPTPred(); + MCRegister CondReg = VCC == ARMVCC::None ? ARM::NoRegister : ARM::P0; + + Inst.addOperand(MCOperand::createImm(VCC)); // $cond + Inst.addOperand(MCOperand::createReg(CondReg)); // $cond_reg + Inst.addOperand(MCOperand::createReg(ARM::NoRegister)); // $tp_reg + + // The last sub-operand ($inactive) is tied to an output operand. + // The output operand has already been decoded, so just copy it. + const MCInstrDesc &MCID = D->MCII->get(Inst.getOpcode()); + unsigned InactiveOpIdx = Inst.getNumOperands(); + int TiedOpIdx = MCID.getOperandConstraint(InactiveOpIdx, MCOI::TIED_TO); + assert(TiedOpIdx >= 0 && + "Inactive register in vpred_r is not tied to an output!"); + + // Make a copy of the operand to ensure it is not invalidated when MI grows. + Inst.addOperand(MCOperand(Inst.getOperand(TiedOpIdx))); // $inactive + + return MCDisassembler::Success; +} + static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const MCDisassembler *Decoder) { @@ -2777,6 +2814,7 @@ static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::createImm(imm)); + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -2802,6 +2840,7 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn, if (!fieldFromInstruction(Insn, 12, 1)) // I bit clear => need input FPSCR Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV)); + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -5466,30 +5505,6 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const MCDisassembler *Decoder) { - // The vpred_r operand type includes an MQPR register field derived - // from the encoding. But we don't actually want to add an operand - // to the MCInst at this stage, because AddThumbPredicate will do it - // later, and will infer the register number from the TIED_TO - // constraint. So this is a deliberately empty decoder method that - // will inhibit the auto-generated disassembly code from adding an - // operand at all. - return MCDisassembler::Success; -} - -[[maybe_unused]] static DecodeStatus -DecodeVpredNOperand(MCInst &Inst, unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - // Similar to above, we want to ensure that no operands are added for the - // vpred operands. (This is marked "maybe_unused" for the moment; because - // DecoderEmitter currently (wrongly) omits operands with no instruction bits, - // the decoder doesn't actually call it yet. That will be addressed in a - // future change.) - return MCDisassembler::Success; -} - static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const MCDisassembler *Decoder) { @@ -5668,6 +5683,7 @@ DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address, if (!Check(S, AddrDecoder(Inst, addr, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5871,7 +5887,7 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; if (!Check(S, DecodeVCVTImmOperand(Inst, imm6, Address, Decoder))) return MCDisassembler::Fail; - + Check(S, DecodeVpredROperand(Inst, Decoder)); return S; } @@ -5906,6 +5922,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, if (!Check(S, predicate_decoder(Inst, fc, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5916,6 +5933,7 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, unsigned Rn = fieldFromInstruction(Insn, 16, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -5925,6 +5943,7 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; Inst.addOperand(MCOperand::createReg(ARM::VPR)); Inst.addOperand(MCOperand::createReg(ARM::VPR)); + Check(S, DecodeVpredNOperand(Inst, Decoder)); return S; } @@ -6199,15 +6218,13 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const { (isVectorPredicable(MI) && ITBlock.instrInITBlock())) S = SoftFail; - // If we're in an IT/VPT block, base the predicate on that. Otherwise, + // If we're in an IT block, base the predicate on that. Otherwise, // assume a predicate of AL. unsigned CC = ARMCC::AL; - unsigned VCC = ARMVCC::None; if (ITBlock.instrInITBlock()) { CC = ITBlock.getITCC(); ITBlock.advanceITState(); } else if (VPTBlock.instrInVPTBlock()) { - VCC = VPTBlock.getVPTPred(); VPTBlock.advanceVPTState(); } @@ -6230,34 +6247,6 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const { Check(S, SoftFail); } - MCInst::iterator VCCI = MI.begin(); - unsigned VCCPos; - for (VCCPos = 0; VCCPos < MCID.NumOperands; ++VCCPos, ++VCCI) { - if (ARM::isVpred(MCID.operands()[VCCPos].OperandType) || VCCI == MI.end()) - break; - } - - if (isVectorPredicable(MI)) { - VCCI = MI.insert(VCCI, MCOperand::createImm(VCC)); - ++VCCI; - if (VCC == ARMVCC::None) - VCCI = MI.insert(VCCI, MCOperand::createReg(0)); - else - VCCI = MI.insert(VCCI, MCOperand::createReg(ARM::P0)); - ++VCCI; - VCCI = MI.insert(VCCI, MCOperand::createReg(0)); - ++VCCI; - if (MCID.operands()[VCCPos].OperandType == ARM::OPERAND_VPRED_R) { - int TiedOp = MCID.getOperandConstraint(VCCPos + 3, MCOI::TIED_TO); - assert(TiedOp >= 0 && - "Inactive register in vpred_r is not tied to an output!"); - // Copy the operand to ensure it's not invalidated when MI grows. - MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp))); - } - } else if (VCC != ARMVCC::None) { - Check(S, SoftFail); - } - return S; } diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index dfd896f..8d8066a 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -49,7 +49,7 @@ public: DwarfUsesRelocationsAcrossSections = enable; } - MCSection *getNonexecutableStackSection(MCContext &Ctx) const override { + MCSection *getStackSection(MCContext &Ctx, bool Exec) const override { return nullptr; } }; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index d0dfa47..a94e131 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETO, MVT::v64f16, Expand); setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); @@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() { setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETO, MVT::v32f32, Expand); // Boolean vectors. @@ -449,6 +453,7 @@ HexagonTargetLowering::initializeHVXLowering() { // Include cases which are not hander earlier setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v32i1, Custom); setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); } @@ -2337,7 +2342,7 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { return ExpandHvxFpToInt(Op, DAG); } -// For vector type v32i1 uint_to_fp to v32f32: +// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32: // R1 = #1, R2 holds the v32i1 param // V1 = vsplat(R1) // V2 = vsplat(R2) @@ -2464,7 +2469,7 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); MVT FpTy = ResTy.getVectorElementType(); - if (Op.getOpcode() == ISD::UINT_TO_FP) { + if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) { if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1) return LowerHvxPred32ToFp(Op, DAG); if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4cfbfca..7ddf996 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2860,8 +2860,7 @@ static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, EVT ResTy, unsigned first) { unsigned NumElts = ResTy.getVectorNumElements(); - assert(first >= 0 && - first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); + assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements()); SmallVector<SDValue, 16> Ops(Node->op_begin() + first, Node->op_begin() + first + NumElts); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 395d2c4..662d3f6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -629,7 +629,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, - G_FTANH}) + G_FTANH, G_FMODF}) .libcallFor({s32, s64}) .libcallFor(ST.is64Bit(), {s128}); getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP}) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7d4535a..b37b740 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); // If it's not a grouped vector register, it doesn't have subregister, so // the base register is just itself. - if (BaseReg == RISCV::NoRegister) + if (!BaseReg.isValid()) BaseReg = Reg; return BaseReg; } diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index cf6f83a..7f5d0af 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// -def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>; - -def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{ - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0); - return Known.isNonNegative(); -}]>; - let Predicates = [IsRV64] in { def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<extloadi16, LH, i32>; @@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>; def : StPat<truncstorei8, SB, GPR, i32>; def : StPat<truncstorei16, SH, GPR, i32>; -def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; -def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>; def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; - -// Use sext if the sign bit of the input is 0. -def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; } let Predicates = [IsRV64, NoStdExtZba] in diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 50649cf..dcce2d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -533,7 +533,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP}, + ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF}, MVT::f16, Promote); // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6a6ead2..cf8d120 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) { // All undefined passthrus should be $noreg: see // RISCVDAGToDAGISel::doPeepholeNoRegPassThru const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); + return !UseMO.getReg().isValid() || UseMO.isUndef(); } /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. @@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { Register Reg = VLOp.getReg(); // Erase the AVL operand from the instruction. - VLOp.setReg(RISCV::NoRegister); + VLOp.setReg(Register()); VLOp.setIsKill(false); if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); @@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { if (!MO.isReg() || !MO.getReg().isVirtual()) return; Register OldVLReg = MO.getReg(); - MO.setReg(RISCV::NoRegister); + MO.setReg(Register()); if (LIS) LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1e6b04f8..7db4832 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), /*RestoreAfter=*/false, /*SpAdj=*/0, /*AllowSpill=*/false); - if (TmpGPR != RISCV::NoRegister) + if (TmpGPR.isValid()) RS->setRegUsed(TmpGPR); else { // The case when there is no scavenged register needs special handling. @@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Invalid operand type for VL operand"; return false; } - if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { + if (Op.isReg() && Op.getReg().isValid()) { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto *RC = MRI.getRegClass(Op.getReg()); if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index efdbd12..447f05c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1417,9 +1417,9 @@ class SelectQCbi<CondCode Cond, DAGOperand InTyImm, Pseudo OpNode > let Predicates = [HasVendorXqciac, IsRV32] in { def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12_lo:$imm12))), (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12_lo:$imm12)>; -def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)), +def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, (i32 uimm5gt3:$imm)), GPRNoX0:$rs2)), (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; -def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)), +def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, (i32 uimm5gt3:$imm), GPRNoX0:$rs2)), (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; } // Predicates = [HasVendorXqciac, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index 1674c95..1dd7332 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO, - (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rs2, $rs1"> { let rd = 0; } @@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> // while atomic_store has data, addr class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> : Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)), - (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>; + (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>; let Predicates = [HasStdExtZalasr] in { diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index f8d33ae..54569b1 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { const MachineOperand &MOImm = MI.getOperand(2); if (!MOImm.isImm()) - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); int64_t Offset = MOImm.getImm(); int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); @@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { } } } - return RegImmPair(RISCV::NoRegister, 0); + return RegImmPair(Register(), 0); } // Check all uses after FirstMI of the given register, keeping a vector of diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index ffba284..fdf9a4f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { Register FalseReg = MI.getOperand(2).getReg(); if (TruePassthruReg != FalseReg) { // If True's passthru is undef see if we can change it to False - if (TruePassthruReg != RISCV::NoRegister || + if (TruePassthruReg.isValid() || !MRI->hasOneUse(MI.getOperand(3).getReg()) || !ensureDominates(MI.getOperand(2), *True)) return false; @@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { // vmv.v.v doesn't have a mask operand, so we may be able to inflate the // register class for the destination and passthru operands e.g. VRNoV0 -> VR MRI->recomputeRegClass(MI.getOperand(0).getReg()); - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) MRI->recomputeRegClass(MI.getOperand(1).getReg()); return true; } @@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) { unsigned PassthruOpIdx = MI.getNumExplicitDefs(); if (HasPassthru) { - if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) + if (MI.getOperand(PassthruOpIdx).getReg()) MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg()); } else MI.removeOperand(PassthruOpIdx); @@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A, bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, MachineInstr &Src) const { assert(MO.getParent()->getParent() == Src.getParent()); - if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) + if (!MO.isReg() || !MO.getReg().isValid()) return true; MachineInstr *Def = MRI->getVRegDef(MO.getReg()); @@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V) return false; - if (MI.getOperand(1).getReg() != RISCV::NoRegister) + if (MI.getOperand(1).getReg().isValid()) return false; // If the input was a pseudo with a policy operand, we can give it a tail @@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { // Src needs to have the same passthru as VMV_V_V MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs()); - if (SrcPassthru.getReg() != RISCV::NoRegister && + if (SrcPassthru.getReg().isValid() && SrcPassthru.getReg() != Passthru.getReg()) return false; @@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. - if (Passthru.getReg() != RISCV::NoRegister) + if (Passthru.getReg().isValid()) MRI->constrainRegClass( Passthru.getReg(), TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI)); diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index 7505507..e8c849e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass { FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType()); FixedVectorType *DstType = cast<FixedVectorType>(GR->findDeducedElementType(Dst)); - assert(DstType->getNumElements() >= SrcType->getNumElements()); + auto dstNumElements = DstType->getNumElements(); + auto srcNumElements = SrcType->getNumElements(); + + // if the element type differs, it is a bitcast. + if (DstType->getElementType() != SrcType->getElementType()) { + // Support bitcast between vectors of different sizes only if + // the total bitwidth is the same. + [[maybe_unused]] auto dstBitWidth = + DstType->getElementType()->getScalarSizeInBits() * dstNumElements; + [[maybe_unused]] auto srcBitWidth = + SrcType->getElementType()->getScalarSizeInBits() * srcNumElements; + assert(dstBitWidth == srcBitWidth && + "Unsupported bitcast between vectors of different sizes."); + + Src = + B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src}); + buildAssignType(B, DstType, Src); + SrcType = DstType; + + StoreInst *SI = B.CreateStore(Src, Dst); + SI->setAlignment(Alignment); + return SI; + } + assert(DstType->getNumElements() >= SrcType->getNumElements()); LoadInst *LI = B.CreateLoad(DstType, Dst); LI->setAlignment(Alignment); Value *OldValues = LI; diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index 6c19049..024030d 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -206,8 +206,8 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (!done) --I; - // skip debug instruction - if (I->isDebugInstr()) + // Skip meta instructions. + if (I->isMetaInstruction()) continue; if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3802506..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -58135,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 |