diff options
author | Lu Weining <luweining@loongson.cn> | 2024-05-29 17:42:41 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-29 17:42:41 +0800 |
commit | 5553f27d5a45e702415fa2f91d842bf4a1f4a8b5 (patch) | |
tree | 9a4cc4a89021fb6ec3018cee2df403e4f3183bda | |
parent | 1594cebedd60a08f408e3fa975116ef4db86bf9b (diff) | |
download | llvm-5553f27d5a45e702415fa2f91d842bf4a1f4a8b5.zip llvm-5553f27d5a45e702415fa2f91d842bf4a1f4a8b5.tar.gz llvm-5553f27d5a45e702415fa2f91d842bf4a1f4a8b5.tar.bz2 |
[AMDGPU][test] Fix the wrong triples in lower-work-group-id-intrinsics-{hsa,pal}.ll. NFC (#93501)
- hsa -> amdhsa
- Use amdgcn-amd-amd{hsa,pal} for lower-work-group-id-intrinsics-{hsa,pal}.ll respectively
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll | 170 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll | 65 |
2 files changed, 88 insertions, 147 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 9547f08..1429251 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_kernel void @workgroup_ids_kernel() { ; GFX9-LABEL: workgroup_ids_kernel: ; GFX9: ; %bb.0: ; %.entry -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -72,27 +72,20 @@ define amdgpu_kernel void @workgroup_ids_kernel() { define amdgpu_kernel void @caller() { ; GFX9-SDAG-LABEL: caller: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s7 -; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-SDAG-NEXT: s_add_u32 s8, s2, 36 -; GFX9-SDAG-NEXT: s_addc_u32 s9, s3, 0 -; GFX9-SDAG-NEXT: s_getpc_b64 s[2:3] -; GFX9-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 -; GFX9-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 -; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[2:3], 0x0 -; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s13 +; GFX9-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s13 +; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-SDAG-NEXT: s_mov_b32 s12, s6 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[14:15] @@ -100,27 +93,20 @@ define amdgpu_kernel void @caller() { ; ; GFX9-GISEL-LABEL: caller: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s7 -; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-GISEL-NEXT: s_add_u32 s8, s2, 36 -; GFX9-GISEL-NEXT: s_addc_u32 s9, s3, 0 -; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x0 +; GFX9-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s13 +; GFX9-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s13 +; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX9-GISEL-NEXT: s_mov_b32 s12, s6 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[14:15] @@ -128,81 +114,61 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s6 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s2, 36 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s3, 0 -; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[2:3] -; GFX9ARCH-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 -; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX9ARCH-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s12 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, s12 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9ARCH-SDAG-NEXT: s_endpgm ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s6 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s2, 36 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s3, 0 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 +; GFX9ARCH-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s12 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 +; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, s12 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, 0 +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 +; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9ARCH-GISEL-NEXT: s_endpgm ; -; GFX12-SDAG-LABEL: caller: -; GFX12-SDAG: ; %bb.0: -; GFX12-SDAG-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 -; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX12-SDAG-NEXT: s_mov_b32 s7, callee@abs32@hi -; GFX12-SDAG-NEXT: s_mov_b32 s6, callee@abs32@lo -; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX12-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 -; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: caller: -; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 -; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX12-GISEL-NEXT: s_mov_b32 s6, callee@abs32@lo -; GFX12-GISEL-NEXT: s_mov_b32 s7, callee@abs32@hi -; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX12-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 -; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: caller: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX12-NEXT: s_getpc_b64 s[4:5] +; GFX12-NEXT: s_sext_i32_i16 s5, s5 +; GFX12-NEXT: s_add_co_u32 s4, s4, callee@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, callee@gotpcrel32@hi+16 +; GFX12-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 +; GFX12-NEXT: s_load_b64 s[6:7], s[4:5], 0x0 +; GFX12-NEXT: s_mov_b64 s[4:5], s[0:1] +; GFX12-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX12-NEXT: s_mov_b32 s32, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() call void @callee(i32 %idx) #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index 14fe4e5..8009f91 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s @@ -67,62 +67,37 @@ define amdgpu_cs void @_amdgpu_cs_main() { } define amdgpu_cs void @caller() { -; GFX9-LABEL: caller: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s10, -1 -; GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; GFX9-NEXT: s_add_u32 s8, s8, s0 -; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: s_getpc_b64 s[0:1] -; GFX9-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9] -; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: s_endpgm -; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s10, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s11, 0xe00000 +; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s8, s0 +; GFX9ARCH-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, s0 ; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, 0 -; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 -; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9ARCH-SDAG-NEXT: s_endpgm ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s10, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s11, 0xe00000 +; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s8, s0 +; GFX9ARCH-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, s0 ; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] -; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 -; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9ARCH-GISEL-NEXT: s_endpgm ; |