aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorShilei Tian <i@tianshilei.me>2025-07-18 13:08:50 -0400
committerGitHub <noreply@github.com>2025-07-18 13:08:50 -0400
commit2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f (patch)
tree0bb6294e919285d81a93b63644490dec1706c00e /llvm/lib/Target
parente11d28faee10dfb5ae6b8aaadadfd2ea1a2a446a (diff)
downloadllvm-2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f.zip
llvm-2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f.tar.gz
llvm-2c50e4cac2c50dbbc9eb3ed78bc0178bfa26d23f.tar.bz2
[AMDGPU] Add support for `v_sat_pk4_i4_[i8,u8]` on gfx1250 (#149528)
Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com> Co-authored-by: Foad, Jay <Jay.Foad@amd.com>
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td5
3 files changed, 8 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index cbbb57c..bf2f37b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4558,6 +4558,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u16:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
+ case Intrinsic::amdgcn_sat_pk4_i4_i8:
+ case Intrinsic::amdgcn_sat_pk4_u4_u8:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ab7d340..9e1951e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2850,6 +2850,7 @@ def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>;
+def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 80eb5d8..f621f85 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -803,6 +803,9 @@ let SubtargetPredicate = isGFX1250Plus in {
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>;
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>;
}
+
+ defm V_SAT_PK4_I4_I8 : VOP1Inst_t16<"v_sat_pk4_i4_i8", VOP1_I16_I32, int_amdgcn_sat_pk4_i4_i8>;
+ defm V_SAT_PK4_U4_U8 : VOP1Inst_t16<"v_sat_pk4_u4_u8", VOP1_I16_I32, int_amdgcn_sat_pk4_u4_u8>;
} // End SubtargetPredicate = isGFX1250Plus
let SubtargetPredicate = isGFX10Plus in {
@@ -1158,6 +1161,8 @@ defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
+defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>;
+defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;