aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/mul_int24.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/mul_int24.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/mul_int24.ll129
1 files changed, 60 insertions, 69 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 3d9c2a2..10d4eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -10,46 +10,43 @@ define amdgpu_kernel void @test_smul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b
; SI-LABEL: test_smul24_i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_bfe_i32 s2, s2, 0x180000
-; SI-NEXT: s_bfe_i32 s3, s3, 0x180000
-; SI-NEXT: s_mul_i32 s2, s2, s3
-; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_mov_b32 s4, s0
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: v_mov_b32_e32 v0, s2
-; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_bfe_i32 s2, s4, 0x180000
+; SI-NEXT: s_bfe_i32 s4, s5, 0x180000
+; SI-NEXT: s_mul_i32 s4, s2, s4
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_smul24_i32:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT: s_mov_b32 s7, 0xf000
-; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_mov_b32 s4, s0
-; VI-NEXT: s_mov_b32 s5, s1
-; VI-NEXT: s_bfe_i32 s0, s2, 0x180000
-; VI-NEXT: s_bfe_i32 s1, s3, 0x180000
-; VI-NEXT: s_mul_i32 s0, s0, s1
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT: s_mov_b64 s[4:5], s[2:3]
+; VI-NEXT: s_bfe_i32 s4, s4, 0x180000
+; VI-NEXT: s_bfe_i32 s5, s5, 0x180000
+; VI-NEXT: s_mul_i32 s4, s4, s5
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: test_smul24_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: s_mov_b32 s7, 0xf000
-; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_bfe_i32 s0, s2, 0x180000
-; GFX9-NEXT: s_bfe_i32 s1, s3, 0x180000
-; GFX9-NEXT: s_mul_i32 s0, s0, s1
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX9-NEXT: s_bfe_i32 s4, s4, 0x180000
+; GFX9-NEXT: s_bfe_i32 s5, s5, 0x180000
+; GFX9-NEXT: s_mul_i32 s4, s4, s5
+; GFX9-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-NEXT: s_mov_b32 s2, -1
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
; EG-LABEL: test_smul24_i32:
@@ -127,16 +124,15 @@ define amdgpu_kernel void @test_smulhi24_i64(ptr addrspace(1) %out, i32 %a, i32
; GFX9-LABEL: test_smulhi24_i64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-NEXT: s_mov_b32 s7, 0xf000
-; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_bfe_i32 s0, s2, 0x180000
-; GFX9-NEXT: s_bfe_i32 s1, s3, 0x180000
-; GFX9-NEXT: s_mul_hi_i32 s0, s0, s1
-; GFX9-NEXT: v_mov_b32_e32 v0, s0
-; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX9-NEXT: s_bfe_i32 s4, s4, 0x180000
+; GFX9-NEXT: s_bfe_i32 s5, s5, 0x180000
+; GFX9-NEXT: s_mul_hi_i32 s4, s4, s5
+; GFX9-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-NEXT: s_mov_b32 s2, -1
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
; EG-LABEL: test_smulhi24_i64:
@@ -464,29 +460,26 @@ define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b
; SI-LABEL: test_smul24_i33:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s4, s0
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_bfe_i32 s0, s8, 0x180000
-; SI-NEXT: s_bfe_i32 s1, s2, 0x180000
-; SI-NEXT: v_mov_b32_e32 v0, s0
-; SI-NEXT: s_mul_i32 s0, s1, s0
-; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s1, v0
-; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_bfe_i32 s4, s4, 0x180000
+; SI-NEXT: s_bfe_i32 s5, s6, 0x180000
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_mul_i32 s4, s5, s4
+; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s5, v0
+; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
-; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_smul24_i33:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
-; VI-NEXT: s_mov_b32 s7, 0xf000
-; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bfe_i32 s2, s2, 0x180000
; VI-NEXT: s_bfe_i32 s3, s4, 0x180000
@@ -494,10 +487,10 @@ define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b
; VI-NEXT: v_mul_hi_i32_i24_e32 v1, s2, v0
; VI-NEXT: v_mul_i32_i24_e32 v0, s2, v0
; VI-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
-; VI-NEXT: s_mov_b32 s5, s1
-; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: test_smul24_i33:
@@ -577,31 +570,29 @@ define amdgpu_kernel void @test_smulhi24_i33(ptr addrspace(1) %out, i33 %a, i33
; SI-LABEL: test_smulhi24_i33:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s4, s0
-; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: v_mov_b32_e32 v0, s8
-; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
+; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s6, v0
; SI-NEXT: v_and_b32_e32 v0, 1, v0
-; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_smulhi24_i33:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
-; VI-NEXT: s_mov_b32 s7, 0xf000
-; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_mov_b32 s4, s0
-; VI-NEXT: v_mov_b32_e32 v0, s8
-; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
-; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_mov_b64 s[6:7], s[2:3]
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s6, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: v_and_b32_e32 v0, 1, v0
-; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: test_smulhi24_i33: