aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll63
1 files changed, 32 insertions, 31 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 5fb50d0..da08f4f 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -3755,42 +3755,44 @@ define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 {
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v13
; CI-NEXT: v_cvt_f16_f32_e32 v13, v22
; CI-NEXT: v_or_b32_e32 v10, v14, v10
+; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:4
+; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32
; CI-NEXT: v_lshlrev_b32_e32 v17, 16, v17
-; CI-NEXT: v_or_b32_e32 v17, v18, v17
; CI-NEXT: v_cvt_f32_f16_e32 v13, v13
+; CI-NEXT: v_or_b32_e32 v17, v18, v17
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:16
; CI-NEXT: v_cvt_f16_f32_e32 v22, v27
-; CI-NEXT: v_cvt_f16_f32_e32 v19, v19
; CI-NEXT: v_cvt_f16_f32_e32 v13, v13
+; CI-NEXT: v_cvt_f16_f32_e32 v19, v19
; CI-NEXT: v_cvt_f32_f16_e32 v22, v22
-; CI-NEXT: v_lshlrev_b32_e32 v19, 16, v19
; CI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; CI-NEXT: v_or_b32_e32 v13, v16, v13
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:12
; CI-NEXT: v_cvt_f16_f32_e32 v22, v22
+; CI-NEXT: v_lshlrev_b32_e32 v19, 16, v19
; CI-NEXT: v_or_b32_e32 v19, v20, v19
; CI-NEXT: v_lshlrev_b32_e32 v20, 16, v21
; CI-NEXT: v_cvt_f16_f32_e32 v21, v30
; CI-NEXT: v_or_b32_e32 v20, v22, v20
; CI-NEXT: v_cvt_f16_f32_e32 v22, v29
-; CI-NEXT: s_waitcnt vmcnt(6)
+; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: v_cvt_f16_f32_e32 v11, v11
; CI-NEXT: v_cvt_f32_f16_e32 v21, v21
; CI-NEXT: v_cvt_f32_f16_e32 v22, v22
; CI-NEXT: v_cvt_f32_f16_e32 v11, v11
-; CI-NEXT: v_cvt_f16_f32_e32 v21, v21
-; CI-NEXT: s_waitcnt vmcnt(5)
+; CI-NEXT: s_waitcnt vmcnt(7)
; CI-NEXT: v_cvt_f16_f32_e32 v12, v12
+; CI-NEXT: v_cvt_f16_f32_e32 v21, v21
; CI-NEXT: v_cvt_f16_f32_e32 v22, v22
; CI-NEXT: v_cvt_f16_f32_e32 v11, v11
-; CI-NEXT: v_lshlrev_b32_e32 v21, 16, v21
; CI-NEXT: v_cvt_f32_f16_e32 v12, v12
+; CI-NEXT: v_lshlrev_b32_e32 v21, 16, v21
; CI-NEXT: v_or_b32_e32 v21, v22, v21
; CI-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; CI-NEXT: v_cvt_f16_f32_e32 v12, v12
-; CI-NEXT: s_waitcnt vmcnt(3)
+; CI-NEXT: s_waitcnt vmcnt(5)
; CI-NEXT: v_cvt_f16_f32_e32 v31, v31
-; CI-NEXT: s_waitcnt vmcnt(2)
+; CI-NEXT: s_waitcnt vmcnt(4)
; CI-NEXT: v_cvt_f16_f32_e32 v32, v32
; CI-NEXT: v_cvt_f32_f16_e32 v31, v31
; CI-NEXT: v_cvt_f32_f16_e32 v32, v32
@@ -3802,6 +3804,27 @@ define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 {
; CI-NEXT: buffer_store_dword v31, v32, s[0:3], 0 offen
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
+; CI-NEXT: s_waitcnt vmcnt(6)
+; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
+; CI-NEXT: s_waitcnt vmcnt(5)
+; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
+; CI-NEXT: v_cvt_f32_f16_e32 v14, v14
+; CI-NEXT: v_cvt_f32_f16_e32 v15, v15
+; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
+; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
+; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
+; CI-NEXT: v_or_b32_e32 v14, v15, v14
+; CI-NEXT: s_waitcnt vmcnt(3)
+; CI-NEXT: v_cvt_f16_f32_e32 v15, v16
+; CI-NEXT: v_cvt_f16_f32_e32 v16, v18
+; CI-NEXT: v_cvt_f32_f16_e32 v15, v15
+; CI-NEXT: v_cvt_f32_f16_e32 v16, v16
+; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
+; CI-NEXT: v_cvt_f16_f32_e32 v16, v16
+; CI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
+; CI-NEXT: v_or_b32_e32 v12, v12, v15
+; CI-NEXT: v_add_i32_e32 v15, vcc, 0x44, v0
+; CI-NEXT: v_or_b32_e32 v11, v16, v11
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: v_cvt_f16_f32_e32 v31, v31
; CI-NEXT: s_waitcnt vmcnt(0)
@@ -3968,28 +3991,6 @@ define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 {
; CI-NEXT: v_or_b32_e32 v31, v32, v31
; CI-NEXT: v_add_i32_e32 v32, vcc, 0x48, v0
; CI-NEXT: buffer_store_dword v31, v32, s[0:3], 0 offen
-; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:4
-; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32
-; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
-; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
-; CI-NEXT: v_cvt_f32_f16_e32 v14, v14
-; CI-NEXT: v_cvt_f32_f16_e32 v15, v15
-; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
-; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
-; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
-; CI-NEXT: v_or_b32_e32 v14, v15, v14
-; CI-NEXT: v_cvt_f16_f32_e32 v15, v16
-; CI-NEXT: v_cvt_f16_f32_e32 v16, v18
-; CI-NEXT: v_cvt_f32_f16_e32 v15, v15
-; CI-NEXT: v_cvt_f32_f16_e32 v16, v16
-; CI-NEXT: v_cvt_f16_f32_e32 v15, v15
-; CI-NEXT: v_cvt_f16_f32_e32 v16, v16
-; CI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
-; CI-NEXT: v_or_b32_e32 v12, v12, v15
-; CI-NEXT: v_or_b32_e32 v11, v16, v11
-; CI-NEXT: v_add_i32_e32 v15, vcc, 0x44, v0
; CI-NEXT: buffer_store_dword v11, v15, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v11, vcc, 64, v0
; CI-NEXT: buffer_store_dword v12, v11, s[0:3], 0 offen