diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP3Instructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 2e7f25b..b6f9568 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -224,6 +224,12 @@ defm V_ALIGNBIT_B32 : VOP3Inst_t16_with_profiles <"v_alignbit_b32", fshr, null_frag>; defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; + +// In gfx9 and 10, opsel is allowed for V_ALIGNBIT_B32 and V_ALIGNBYTE_B32. +// Hardware uses opsel[1:0] to byte-select src2. Other opsel bits are ignored. +defm V_ALIGNBIT_B32_opsel : VOP3Inst <"v_alignbit_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>; +defm V_ALIGNBYTE_B32_opsel : VOP3Inst <"v_alignbyte_b32_opsel", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_OPSEL>>; + let True16Predicate = UseRealTrue16Insts in defm V_ALIGNBYTE_B32_t16 : VOP3Inst <"v_alignbyte_b32_t16", VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>>; let True16Predicate = UseFakeTrue16Insts in @@ -265,6 +271,16 @@ let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 } // End isReMaterializable = 1 +let SubtargetPredicate = isGFX9GFX10 in +def : GCNPat < +(i32 (int_amdgcn_alignbyte (i32 (VOP3OpSelMods i32:$src0, i32:$src0_modifiers)), + (i32 (VOP3OpSelMods i32:$src1, i32:$src1_modifiers)), + (i32 (VOP3OpSelMods i32:$src2, i32:$src2_modifiers)))), +(V_ALIGNBYTE_B32_opsel_e64 i32:$src0_modifiers, VSrc_b32:$src0, + i32:$src1_modifiers, VSrc_b32:$src1, + i32:$src2_modifiers, VGPR_32:$src2) +>; + let True16Predicate = UseFakeTrue16Insts in def : GCNPat < (i32 (int_amdgcn_alignbyte (i32 (VOP3OpSelMods i32:$src0, i32:$src0_modifiers)), @@ -1902,6 +1918,7 @@ let AssemblerPredicate = isGFX11Plus in { // These instructions differ from GFX12 variant by supporting DPP: defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>; +defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; //===----------------------------------------------------------------------===// // GFX10. @@ -1954,6 +1971,9 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" +defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14e, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">; +defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx10_with_name<0x14f, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">; + defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { @@ -2104,8 +2124,8 @@ defm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>; defm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>; defm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>; defm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>; -defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>; -defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>; +defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7<0x14e>; +defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7<0x14f>; defm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>; defm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>; defm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>; @@ -2248,6 +2268,17 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0> } } +// Instructions such as v_alignbyte_b32 allows op_sel in gfx9, but not in vi. +// The following is created to support that. +multiclass VOP3OpSel_Real_gfx9_with_name<bits<10> op, string opName, string AsmName> { + defvar psName = opName#"_e64"; + def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(psName), SIEncodingFamily.VI>, // note: encoding family is VI + VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(psName).Pfl> { + VOP3_Pseudo ps = !cast<VOP3_Pseudo>(psName); + let AsmString = AsmName # ps.AsmOperands; + } +} + } // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -2267,8 +2298,10 @@ defm V_BFI_B32 : VOP3_Real_vi <0x1ca>; defm V_FMA_F32 : VOP3_Real_vi <0x1cb>; defm V_FMA_F64 : VOP3_Real_vi <0x1cc>; defm V_LERP_U8 : VOP3_Real_vi <0x1cd>; +let SubtargetPredicate = isGFX8Only in { defm V_ALIGNBIT_B32 : VOP3_Real_vi <0x1ce>; defm V_ALIGNBYTE_B32 : VOP3_Real_vi <0x1cf>; +} defm V_MIN3_F32 : VOP3_Real_vi <0x1d0>; defm V_MIN3_I32 : VOP3_Real_vi <0x1d1>; defm V_MIN3_U32 : VOP3_Real_vi <0x1d2>; @@ -2313,6 +2346,9 @@ defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16" defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">; defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">; +defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">; +defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">; + defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">; defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; |