aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/spillv16.ll204
2 files changed, 206 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
index c552f9d..88a51e9 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
+++ b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
@@ -1,10 +1,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --stress-regalloc=10 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --stress-regalloc=10 < %s | FileCheck -check-prefix=GCN %s
+; Rematerialization test for fp64 constants (w/ intentionally high register pressure).
+; Check to make sure we have at least six constant MOVs, not necessarily consecutive, inside the loop.
+
; GCN-LABEL: {{^}}test_remat_sgpr:
; GCN-NOT: v_writelane_b32
-; GCN-COUNT-4: s_mov_b32 s{{[0-9]+}}, 0x
; GCN: {{^}}[[LOOP:.LBB[0-9_]+]]:
+; GCN-COUNT-6: {{s_mov_b32|v_mov_b32_e32}} {{[sv]}}{{[0-9]+}}, 0x
; GCN-NOT: v_writelane_b32
; GCN: s_cbranch_{{[^ ]+}} [[LOOP]]
; GCN: .sgpr_spill_count: 0
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll
index 2d54ac8..9686c9d 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.ll
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,+d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W32
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16,-d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX12-TRUE16,GFX12-TRUE16-D16W16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-TRUE16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-FAKE16
@@ -35,6 +37,26 @@ define void @spill_i16_alu() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_i16_alu:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-TRUE16-LABEL: spill_i16_alu:
; GFX1250-TRUE16: ; %bb.0: ; %entry
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -126,6 +148,56 @@ define void @spill_i16_alu_two_vals() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-D16W32-LABEL: spill_i16_alu_two_vals:
+; GFX12-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX12-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W32-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-D16W16-LABEL: spill_i16_alu_two_vals:
+; GFX12-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX12-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX12-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-D16W16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
; GFX1250-TRUE16: ; %bb.0: ; %entry
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -223,6 +295,25 @@ define void @spill_i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_i16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-LABEL: spill_i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -282,6 +373,25 @@ define void @spill_half() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_half:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-LABEL: spill_half:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -341,6 +451,25 @@ define void @spill_i16_from_v2i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_i16_from_v2i16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-LABEL: spill_i16_from_v2i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -414,13 +543,39 @@ define void @spill_2xi16_from_v2i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
; GFX1250-TRUE16: ; %bb.0: ; %entry
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12
; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
@@ -444,7 +599,7 @@ define void @spill_2xi16_from_v2i16() {
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
@@ -520,6 +675,32 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
; GFX1250-TRUE16: ; %bb.0: ; %entry
; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -595,6 +776,25 @@ define void @spill_v2i16() {
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
+; GFX12-TRUE16-LABEL: spill_v2i16:
+; GFX12-TRUE16: ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX12-TRUE16-NEXT: ;;#ASMSTART
+; GFX12-TRUE16-NEXT: ;;#ASMEND
+; GFX12-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1250-LABEL: spill_v2i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0