aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2024-11-20 10:52:38 -0800
committerGitHub <noreply@github.com>2024-11-20 10:52:38 -0800
commit201f4f6bcccf3f0ac0c9d3e8c484fb2c53bfb016 (patch)
tree41d78be7dbb00134335c08739596adf4a9a02e09 /llvm/lib
parentb170ab21c3cd16c1fc1917d91092b221b4163442 (diff)
downloadllvm-201f4f6bcccf3f0ac0c9d3e8c484fb2c53bfb016.zip
llvm-201f4f6bcccf3f0ac0c9d3e8c484fb2c53bfb016.tar.gz
llvm-201f4f6bcccf3f0ac0c9d3e8c484fb2c53bfb016.tar.bz2
AMDGPU: Add v_mfma_ld_scale_b32 for gfx950 (#116722)
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td26
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td11
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td6
3 files changed, 30 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2079b34..d2024cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1991,13 +1991,14 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
+ bit HasNeg,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
- dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+ dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));
dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
dag ret = !con(base, vop3pFields);
@@ -2191,22 +2192,22 @@ class getAsmVOPDPart <int NumSrcArgs, string XorY> {
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
-class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
bit HasClamp, bit HasOpSel> {
- string dst = "$vdst";
- string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
+ string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1",
" $src1,"));
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
- string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+ string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
string clamp = !if(HasClamp, "$clamp", "");
string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
// Each modifier is printed as an array of bits for each operand, so
// all operands are printed as part of src0_modifiers.
- string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
+ string ret = dst#src0#src1#src2#opsel#mods#clamp;
}
// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
@@ -2267,7 +2268,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
bit HasOpSel, bit HasOMod, bit IsVOP3P,
- bit HasModifiers, bit Src0HasMods,
+ bit HasNeg, bit Src0HasMods,
bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
bit HasByteSel = 0> {
string dst = !if(HasDst,
@@ -2294,7 +2295,7 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
string bytesel = !if(HasByteSel, "$byte_sel", "");
string 3PMods = !if(IsVOP3P,
!if(HasOpSel, "$op_sel_hi", "")
- #!if(HasModifiers, "$neg_lo$neg_hi", ""),
+ #!if(HasNeg, "$neg_lo$neg_hi", ""),
"");
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");
@@ -2554,6 +2555,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
isModifierType<Src1VT>.ret,
isModifierType<Src2VT>.ret,
HasOMod);
+ field bit HasNeg = HasModifiers;
field bit HasSrc0Mods = HasModifiers;
field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2589,7 +2591,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp, HasOpSel,
+ NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
@@ -2607,7 +2609,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
- Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
+ Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
@@ -2635,10 +2637,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
// the asm operand name via this HasModifiers flag
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
- HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
+ HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
HasModifiers, DstVT, IsFP8ByteSel>.ret;
field string Asm64 = AsmVOP3Base;
- field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
+ field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
HasClamp,
HasOMod,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 661c6dd..876d4e1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -20,6 +20,11 @@ class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
let AsmVOP3Base = AsmVOP3P;
}
+def VOP_MFMA_LD_SCALE : VOP3P_Profile<VOPProfile<[untyped, i32, i32, untyped]>, VOP3P_LD_SCALE> {
+ let HasModifiers = 1;
+ let HasNeg = 0;
+}
+
// Used for FMA_MIX* and MAD_MIX* insts
// Their operands are only sort of f16 operands. Depending on
// op_sel_hi, these may be interpreted as f32. The inline immediate
@@ -750,6 +755,10 @@ defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16
defm V_MFMA_F32_32X32X16_BF16 : MAIInst<"v_mfma_f32_32x32x16bf16", "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
}
+let SubtargetPredicate = HasGFX950Insts in {
+defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
+}
+
let SubtargetPredicate = isGFX90APlus in {
let is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
@@ -1787,6 +1796,8 @@ defm V_MFMA_F32_16X16X32_F16 : VOP3P_Real_MFMA_gfx950 <0x54, "v_mfma_f32_16x
defm V_MFMA_F32_32X32X16_F16 : VOP3P_Real_MFMA_gfx950 <0x55, "v_mfma_f32_32x32x16_f16">;
defm V_MFMA_F32_32X32X16_BF16 : VOP3P_Real_MFMA_gfx950 <0x37, "v_mfma_f32_32x32x16_bf16">;
+defm V_MFMA_LD_SCALE_B32 : VOP3P_Real_vi <0x2c>;
+
defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 1be434c..a6e6ada 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -423,7 +423,7 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<2> index_key_8bit;
bits<1> index_key_16bit;
- let Inst{7-0} = vdst;
+ let Inst{7-0} = !if(P.HasDst, vdst, 0);
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
@@ -1365,6 +1365,10 @@ def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
+// Packed is misleading, but it enables the appropriate op_sel
+// modifiers.
+def VOP3P_LD_SCALE : VOP3Features<0, 1, 1, 0>;
+
class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);