diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ab7d340..392bf42 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1307,9 +1307,16 @@ let PrintMethod = "printBitOp3" in def BitOp3 : NamedIntOperand<"bitop3">; def bitop3_0 : DefaultOperand<BitOp3, 0>; +def MatrixAFMT : CustomOperand<i32, 1, "MatrixAFMT">; +def MatrixBFMT : CustomOperand<i32, 1, "MatrixBFMT">; + def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">; def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">; +def ScaleSel : NamedIntOperand<"scale_sel"> { + let Validator = "isUInt<3>"; +} + class KImmFPOperand<ValueType vt> : ImmOperand<vt> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_KIMM"#vt.Size; @@ -1659,6 +1666,8 @@ def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; +def VOP3PMadMixBF16ModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16ModsExt">; +def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods">; def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; @@ -1765,6 +1774,7 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> { !eq(VT.Size, 256) : VOPDstOperand<VReg_256>, !eq(VT.Size, 192) : VOPDstOperand<VReg_192>, !eq(VT.Size, 128) : VOPDstOperand<VReg_128>, + !eq(VT.Size, 96) : VOPDstOperand<VReg_96>, !eq(VT.Size, 64) : VOPDstOperand<VReg_64>, !eq(VT.Size, 32) : VOPDstOperand<VGPR_32>, !eq(VT.Size, 16) : op16, @@ -1882,6 +1892,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { !eq(VT, v4bf16) : AVSrc_64, !eq(VT.Size, 1024) : VRegSrc_1024, !eq(VT.Size, 512) : VRegSrc_512, + !eq(VT.Size, 384) : VRegSrc_384, !eq(VT.Size, 256) : VRegSrc_256, !eq(VT.Size, 192) : VRegSrc_192, !eq(VT.Size, 128) : VRegSrc_128, @@ -1894,6 +1905,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { class getVOP3VRegSrcForVT<ValueType VT> { RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VRegSrc_1024, !eq(VT.Size, 512) : VRegSrc_512, + !eq(VT.Size, 384) : VRegSrc_384, !eq(VT.Size, 256) : VRegSrc_256, !eq(VT.Size, 192) : VRegSrc_192, !eq(VT.Size, 128) : VRegSrc_128, @@ -1913,6 +1925,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> { !eq(VT, v2f16) : VCSrc_v2f16, !eq(VT, v2bf16) : VCSrc_v2bf16, !eq(VT, f32) : VCSrc_f32, + !eq(VT, v2i32) : VCSrc_v2b32, 1 : VCSrc_b32); } @@ -2666,6 +2679,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { HasOMod); field bit HasNeg = HasModifiers; field bit HasMatrixReuse = 0; + field bit HasMatrixFMT = 0; field bit HasSrc0Mods = HasModifiers; field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); @@ -2850,15 +2864,19 @@ def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>; +def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>; +def VOP_I16_V2F16 : VOPProfile<[i16, v2f16, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; +def VOP_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, untyped]>; def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; +def VOP_BF16_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, bf16, untyped]>; def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; @@ -2866,10 +2884,12 @@ def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; +def VOP_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, untyped]>; def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; +def VOP_V2BF16_V2BF16_V2BF16_V2BF16 : VOPProfile <[v2bf16, v2bf16, v2bf16, v2bf16]>; def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; @@ -2905,12 +2925,16 @@ def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; def VOP_I16_F32_F32 : VOPProfile <[i16, f32, f32, untyped]>; def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; +def VOP_I64_I64_I64_ARITH : VOPProfile <[i64, i64, i64, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; +def VOP_F32_BF16_BF16_BF16 : VOPProfile <[f32, bf16, bf16, bf16]>; def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>; def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>; def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>; +def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>; +def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>; def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>; def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>; def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>; @@ -2926,11 +2950,32 @@ def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>; def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>; def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>; def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>; +def VOP_V16F16_V3I32_I32 : VOPProfile<[v16f16, v3i32, i32, untyped]>; +def VOP_V16BF16_V3I32_I32 : VOPProfile<[v16bf16, v3i32, i32, untyped]>; +def VOP_V8F16_V2I32_I32 : VOPProfile<[v8f16, v2i32, i32, untyped]>; +def VOP_V8BF16_V2I32_I32 : VOPProfile<[v8bf16, v2i32, i32, untyped]>; +def VOP_V8F16_I32_I32 : VOPProfile<[v8f16, i32, i32, untyped]>; +def VOP_V8BF16_I32_I32 : VOPProfile<[v8bf16, i32, i32, untyped]>; +def VOP_V16F32_V3I32_I32 : VOPProfile<[v16f32, v3i32, i32, untyped]>; +def VOP_V8F32_V2I32_I32 : VOPProfile<[v8f32, v2i32, i32, untyped]>; +def VOP_V8F32_I32_I32 : VOPProfile<[v8f32, i32, i32, untyped]>; +def VOP_V2I32_V8BF16_F32 : VOPProfile<[v2i32, v8bf16, f32, untyped]>; +def VOP_V2I32_V8F16_F32 : VOPProfile<[v2i32, v8f16, f32, untyped]>; +def VOP_V2I32_V8F32_F32 : VOPProfile<[v2i32, v8f32, f32, untyped]>; +def VOP_I32_V8F32_F32 : VOPProfile<[i32, v8f32, f32, untyped]>; +def VOP_I32_V8F16_F32 : VOPProfile<[i32, v8f16, f32, untyped]>; +def VOP_I32_V8BF16_F32 : VOPProfile<[i32, v8bf16, f32, untyped]>; def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>; def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>; def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>; def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>; +def VOP_V2I32_V8BF16_I32_F32 : VOPProfile<[v2i32, v8bf16, i32, f32]>; +def VOP_V2I32_V8F16_I32_F32 : VOPProfile<[v2i32, v8f16, i32, f32]>; +def VOP_V2I32_V8F32_I32_F32 : VOPProfile<[v2i32, v8f32, i32, f32]>; +def VOP_I32_V8F32_I32_F32 : VOPProfile<[i32, v8f32, i32, f32]>; +def VOP_I32_V8F16_I32_F32 : VOPProfile<[i32, v8f16, i32, f32]>; +def VOP_I32_V8BF16_I32_F32 : VOPProfile<[i32, v8bf16, i32, f32]>; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; |