diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll | 106 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pal-metadata-3.6-dvgpr.ll | 204 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll | 13 |
9 files changed, 286 insertions, 85 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 9e24023..ebbeab9 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_nop 7 -; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v35, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v35 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir index a42cf43..7e82382d 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir @@ -40,8 +40,8 @@ body: | ; GFX908: liveins: $agpr0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 ; ; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll index c4479b3..e3bc516 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll @@ -15,6 +15,9 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s + ; NO-SRAM-ECC-GFX906: Flags [ ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) @@ -52,6 +55,11 @@ ; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F) ; SRAM-ECC-GFX950: ] +; SRAM-ECC-GFX1250: Flags [ +; SRAM-ECC-GFX1250: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-GFX1250: EF_AMDGPU_MACH_AMDGCN_GFX1250 (0x49) +; SRAM-ECC-GFX1250: ] + define amdgpu_kernel void @elf_header() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll index 51cd564..f46116e 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll @@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 ; GREEDY908-NEXT: s_nop 1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32 -; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61 -; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 -; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33 -; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59 -; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58 -; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34 -; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57 -; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56 +; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35 -; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55 -; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54 -; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36 -; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53 -; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52 -; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37 -; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51 -; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50 +; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38 -; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49 -; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48 -; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39 -; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47 -; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46 -; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40 -; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19 +; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41 -; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18 -; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17 -; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42 -; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16 -; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15 -; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43 -; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14 -; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13 +; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44 -; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12 -; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11 -; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45 -; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10 -; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9 -; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1 -; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8 -; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46 +; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49 +; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52 +; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55 +; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58 +; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61 +; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2 ; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6 -; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5 +; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 @@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33] ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33] ; GREEDY908-NEXT: s_nop 8 +; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18 ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19 -; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18 ; GREEDY908-NEXT: s_nop 0 +; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5 ; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15] ; GREEDY908-NEXT: s_nop 9 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index cf244f0..be1788c 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: s_branch .LBB0_2 ; GFX908-NEXT: .LBB0_1: ; %bb2 ; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GFX908-NEXT: s_nop 6 +; GFX908-NEXT: v_accvgpr_read_b32 v3, a2 ; GFX908-NEXT: s_or_b32 s4, s3, 1 ; GFX908-NEXT: s_ashr_i32 s5, s3, 31 ; GFX908-NEXT: s_mov_b32 s3, s2 ; GFX908-NEXT: v_mov_b32_e32 v1, s2 -; GFX908-NEXT: s_nop 2 -; GFX908-NEXT: v_accvgpr_read_b32 v0, a2 ; GFX908-NEXT: v_mov_b32_e32 v2, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a0, v3 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX908-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX908-NEXT: s_and_b32 s3, s5, s4 ; GFX908-NEXT: v_accvgpr_write_b32 a2, v4 ; GFX908-NEXT: v_accvgpr_write_b32 a3, v3 -; GFX908-NEXT: s_and_b32 s3, s5, s4 +; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3] ; GFX908-NEXT: s_cbranch_execz .LBB0_4 ; GFX908-NEXT: .LBB0_2: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll index 6b7d704..ede470b 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll @@ -1,13 +1,11 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefixes=CHECK,GFX11 %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck --check-prefixes=CHECK,GFX12 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s ; CHECK: .amdgpu_pal_metadata ; CHECK-NEXT: --- ; CHECK-NEXT: amdpal.pipelines: ; CHECK-NEXT: - .api: Vulkan ; CHECK-NEXT: .compute_registers: -; DVGPR-NEXT: .dynamic_vgpr_en: true ; CHECK-NEXT: .tg_size_en: true ; CHECK-NEXT: .tgid_x_en: false ; CHECK-NEXT: .tgid_y_en: false diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll index 5c0c366..5325499 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll @@ -1,17 +1,14 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11,NODVGPR -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK,NODVGPR -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK ; CHECK-LABEL: {{^}}_amdgpu_cs_main: -; NODVGPR: ; TotalNumSgprs: 4 -; DVGPR: ; TotalNumSgprs: 34 +; CHECK: ; TotalNumSgprs: 4 ; CHECK: ; NumVgprs: 2 ; CHECK: .amdgpu_pal_metadata ; CHECK-NEXT: --- ; CHECK-NEXT: amdpal.pipelines: ; CHECK-NEXT: - .api: Vulkan ; CHECK-NEXT: .compute_registers: -; DVGPR-NEXT: .dynamic_vgpr_en: true ; CHECK-NEXT: .tg_size_en: true ; CHECK-NEXT: .tgid_x_en: false ; CHECK-NEXT: .tgid_y_en: false @@ -57,7 +54,6 @@ ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0x9444d7d0 ; CHECK-NEXT: .debug_mode: false -; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70 ; CHECK-NEXT: .entry_point: _amdgpu_cs_main ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 @@ -69,8 +65,7 @@ ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false ; CHECK-NEXT: .scratch_memory_size: 0 -; NODVGPR-NEXT: .sgpr_count: 0x4 -; DVGPR-NEXT: .sgpr_count: 0x22 +; CHECK-NEXT: .sgpr_count: 0x4 ; CHECK-NEXT: .sgpr_limit: 0x6a ; CHECK-NEXT: .threadgroup_dimensions: ; CHECK-NEXT: - 0x1 diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6-dvgpr.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6-dvgpr.ll new file mode 100644 index 0000000..e598b0c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6-dvgpr.ll @@ -0,0 +1,204 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK + +; CHECK-LABEL: {{^}}_amdgpu_cs_main: +; CHECK: ; TotalNumSgprs: 34 +; CHECK: ; NumVgprs: 2 +; CHECK: .amdgpu_pal_metadata +; CHECK-NEXT: --- +; CHECK-NEXT: amdpal.pipelines: +; CHECK-NEXT: - .api: Vulkan +; CHECK-NEXT: .compute_registers: +; CHECK-NEXT: .dynamic_vgpr_en: true +; CHECK-NEXT: .tg_size_en: true +; CHECK-NEXT: .tgid_x_en: false +; CHECK-NEXT: .tgid_y_en: false +; CHECK-NEXT: .tgid_z_en: false +; CHECK-NEXT: .tidig_comp_cnt: 0x1 +; CHECK-NEXT: .graphics_registers: +; CHECK-NEXT: .ps_extra_lds_size: 0 +; CHECK-NEXT: .spi_ps_input_addr: +; CHECK-NEXT: .ancillary_ena: false +; CHECK-NEXT: .front_face_ena: true +; CHECK-NEXT: .line_stipple_tex_ena: false +; CHECK-NEXT: .linear_center_ena: true +; CHECK-NEXT: .linear_centroid_ena: true +; CHECK-NEXT: .linear_sample_ena: true +; CHECK-NEXT: .persp_center_ena: true +; CHECK-NEXT: .persp_centroid_ena: true +; CHECK-NEXT: .persp_pull_model_ena: false +; CHECK-NEXT: .persp_sample_ena: true +; CHECK-NEXT: .pos_fixed_pt_ena: true +; CHECK-NEXT: .pos_w_float_ena: false +; CHECK-NEXT: .pos_x_float_ena: false +; CHECK-NEXT: .pos_y_float_ena: false +; CHECK-NEXT: .pos_z_float_ena: false +; CHECK-NEXT: .sample_coverage_ena: false +; CHECK-NEXT: .spi_ps_input_ena: +; CHECK-NEXT: .ancillary_ena: false +; CHECK-NEXT: .front_face_ena: false +; CHECK-NEXT: .line_stipple_tex_ena: false +; CHECK-NEXT: .linear_center_ena: false +; CHECK-NEXT: .linear_centroid_ena: false +; CHECK-NEXT: .linear_sample_ena: false +; CHECK-NEXT: .persp_center_ena: false +; CHECK-NEXT: .persp_centroid_ena: false +; CHECK-NEXT: .persp_pull_model_ena: false +; CHECK-NEXT: .persp_sample_ena: true +; CHECK-NEXT: .pos_fixed_pt_ena: false +; CHECK-NEXT: .pos_w_float_ena: false +; CHECK-NEXT: .pos_x_float_ena: false +; CHECK-NEXT: .pos_y_float_ena: false +; CHECK-NEXT: .pos_z_float_ena: false +; CHECK-NEXT: .sample_coverage_ena: false +; CHECK-NEXT: .hardware_stages: +; CHECK-NEXT: .cs: +; CHECK-NEXT: .checksum_value: 0x9444d7d0 +; CHECK-NEXT: .debug_mode: false +; CHECK-NEXT: .dynamic_vgpr_saved_count: 0x70 +; CHECK-NOT: .entry_point: _amdgpu_cs_main +; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main +; CHECK-NEXT: .excp_en: 0 +; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true +; GFX11-NEXT: .ieee_mode: false +; CHECK-NEXT: .image_op: false +; CHECK-NEXT: .lds_size: 0 +; CHECK-NEXT: .mem_ordered: true +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x22 +; CHECK-NEXT: .sgpr_limit: 0x6a +; CHECK-NEXT: .threadgroup_dimensions: +; CHECK-NEXT: - 0x1 +; CHECK-NEXT: - 0x400 +; CHECK-NEXT: - 0x1 +; CHECK-NEXT: .trap_present: false +; CHECK-NEXT: .user_data_reg_map: +; CHECK-NEXT: - 0x10000000 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: .user_sgprs: 0x3 +; CHECK-NEXT: .vgpr_count: 0x2 +; CHECK-NEXT: .vgpr_limit: 0x100 +; CHECK-NEXT: .wavefront_size: 0x40 +; CHECK-NEXT: .wgp_mode: false +; CHECK-NEXT: .gs: +; CHECK-NEXT: .debug_mode: false +; CHECK-NOT: .entry_point: _amdgpu_gs_main +; CHECK-NEXT: .entry_point_symbol: gs_shader +; CHECK-NEXT: .forward_progress: true +; GFX11-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0x200 +; CHECK-NEXT: .mem_ordered: true +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x1 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: true +; CHECK-NEXT: .hs: +; CHECK-NEXT: .debug_mode: false +; CHECK-NOT: .entry_point: _amdgpu_hs_main +; CHECK-NEXT: .entry_point_symbol: hs_shader +; CHECK-NEXT: .forward_progress: true +; GFX11-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0x1000 +; CHECK-NEXT: .mem_ordered: true +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x1 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: true +; CHECK-NEXT: .ps: +; CHECK-NEXT: .debug_mode: false +; CHECK-NOT: .entry_point: _amdgpu_ps_main +; CHECK-NEXT: .entry_point_symbol: ps_shader +; CHECK-NEXT: .forward_progress: true +; GFX11-NEXT: .ieee_mode: false +; CHECK-NEXT: .lds_size: 0 +; CHECK-NEXT: .mem_ordered: true +; CHECK-NEXT: .scratch_en: false +; CHECK-NEXT: .scratch_memory_size: 0 +; CHECK-NEXT: .sgpr_count: 0x1 +; CHECK-NEXT: .vgpr_count: 0x1 +; CHECK-NEXT: .wgp_mode: true +; CHECK: .registers: {} +; CHECK:amdpal.version: +; CHECK-NEXT: - 0x3 +; CHECK-NEXT: - 0x6 +; CHECK-NEXT:... +; CHECK-NEXT: .end_amdgpu_pal_metadata + +define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 { +.entry: + %i = call i64 @llvm.amdgcn.s.getpc() + %i1 = and i64 %i, -4294967296 + %i2 = zext i32 %arg1 to i64 + %i3 = or i64 %i1, %i2 + %i4 = inttoptr i64 %i3 to ptr addrspace(4) + %i5 = and i32 %arg2, 1023 + %i6 = lshr i32 %arg2, 10 + %i7 = and i32 %i6, 1023 + %i8 = add nuw nsw i32 %i7, %i5 + %i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16 + %.idx = shl nuw nsw i32 %i8, 2 + call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0) + ret void +} + +define dllexport amdgpu_ps void @ps_shader() #1 { + ret void +} + +@LDS.GS = external addrspace(3) global [1 x i32], align 4 + +define dllexport amdgpu_gs void @gs_shader() { + %ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0 + store i32 0, ptr addrspace(3) %ptr, align 4 + ret void +} + +@LDS.HS = external addrspace(3) global [1024 x i32], align 4 + +define dllexport amdgpu_hs void @hs_shader() { + %ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0 + store i32 0, ptr addrspace(3) %ptr, align 4 + ret void +} + +!amdgpu.pal.metadata.msgpack = !{!0} + +attributes #0 = { nounwind memory(readwrite) "target-features"=",+wavefrontsize64,+cumode" "amdgpu-dynamic-vgpr-block-size"="16" } + +attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" "amdgpu-dynamic-vgpr-block-size"="16" } + +!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\06"} +!1 = !{i32 7} diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll index 830872a..d2f26e8 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll @@ -1,17 +1,14 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11,NODVGPR -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK,NODVGPR -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK ; CHECK-LABEL: {{^}}_amdgpu_cs_main: -; NODVGPR: ; TotalNumSgprs: 4 -; DVGPR: ; TotalNumSgprs: 34 +; CHECK: ; TotalNumSgprs: 4 ; CHECK: ; NumVgprs: 2 ; CHECK: .amdgpu_pal_metadata ; CHECK-NEXT: --- ; CHECK-NEXT: amdpal.pipelines: ; CHECK-NEXT: - .api: Vulkan ; CHECK-NEXT: .compute_registers: -; DVGPR-NEXT: .dynamic_vgpr_en: true ; CHECK-NEXT: .tg_size_en: true ; CHECK-NEXT: .tgid_x_en: false ; CHECK-NEXT: .tgid_y_en: false @@ -57,7 +54,6 @@ ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0x9444d7d0 ; CHECK-NEXT: .debug_mode: false -; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70 ; CHECK-NOT: .entry_point: _amdgpu_cs_main ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 @@ -69,8 +65,7 @@ ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false ; CHECK-NEXT: .scratch_memory_size: 0 -; NODVGPR-NEXT: .sgpr_count: 0x4 -; DVGPR-NEXT: .sgpr_count: 0x22 +; CHECK-NEXT: .sgpr_count: 0x4 ; CHECK-NEXT: .sgpr_limit: 0x6a ; CHECK-NEXT: .threadgroup_dimensions: ; CHECK-NEXT: - 0x1 |