aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <rampitec@users.noreply.github.com>2024-02-05 09:35:01 -0800
committerGitHub <noreply@github.com>2024-02-05 09:35:01 -0800
commitd0b5d32ce6a6287ddab96b028db534cc1bd9a929 (patch)
tree4ec9307f90861f6ca71f16caef4eef9608d54e8d
parentb4c7152eb4f7971c111e3e2f60b55892def58d5d (diff)
downloadllvm-d0b5d32ce6a6287ddab96b028db534cc1bd9a929.zip
llvm-d0b5d32ce6a6287ddab96b028db534cc1bd9a929.tar.gz
llvm-d0b5d32ce6a6287ddab96b028db534cc1bd9a929.tar.bz2
[AMDGPU] Fixed byte_sel of v_cvt_f32_bf8/v_cvt_f32_fp8 (#80502)
Opsel bits are swapped. Actual byte select table: Byte OPSEL 0 0 1 2 2 1 3 3
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll8
3 files changed, 8 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 920c220..58b67b2 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -668,10 +668,8 @@ class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
(f32 (node i32:$src, index)),
!if (index,
- (inst_e64 !if(index{0},
- !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1),
- SRCMODS.OP_SEL_0),
- !if(index{1}, SRCMODS.OP_SEL_1, 0)),
+ (inst_e64 !or(!if(index{0}, SRCMODS.OP_SEL_1, 0),
+ !if(index{1}, SRCMODS.OP_SEL_0, 0)),
$src, 0),
(inst_e32 $src))
>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
index f49fec6..e21d610 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
@@ -16,7 +16,7 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) {
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
+; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
; GFX12-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 1)
@@ -28,7 +28,7 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) {
; GFX12: ; %bb.0:
; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
+; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
; GFX12-NEXT: ; return to shader part epilog
%tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %a, i32 228, i32 15, i32 15, i1 1)
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %tmp0, i32 2)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index 17b1fcf..f915fa8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -45,7 +45,7 @@ define float @test_cvt_f32_bf8_byte1(i32 %a) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
+; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
ret float %ret
@@ -65,7 +65,7 @@ define float @test_cvt_f32_bf8_byte2(i32 %a) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[0,1]
+; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
ret float %ret
@@ -125,7 +125,7 @@ define float @test_cvt_f32_fp8_byte1(i32 %a) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[1,0]
+; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
ret float %ret
@@ -145,7 +145,7 @@ define float @test_cvt_f32_fp8_byte2(i32 %a) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[0,1]
+; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 op_sel:[1,0]
; GFX12-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
ret float %ret