diff options
4 files changed, 191 insertions, 191 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll index f0aed60..1eb8e26 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll @@ -1,28 +1,28 @@ -; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
-
-; FUNC-LABEL: {{^}}uniform_add_SIC:
-; GCN: S_SUB_I32 killed %{{[0-9]+}}, 32
-define amdgpu_kernel void @uniform_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
- %a = load i32, ptr addrspace(1) %in
- %result = add i32 %a, -32
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}uniform_add_SIC:
-; SI: V_SUB_CO_U32_e64 killed %{{[0-9]+}}, 32
-; GFX900: V_SUB_U32_e64 killed %{{[0-9]+}}, 32
-define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
- %a = load volatile i32, ptr addrspace(1) %gep
- %result = add i32 %a, -32
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s + +; FUNC-LABEL: {{^}}uniform_add_SIC: +; GCN: S_SUB_I32 killed %{{[0-9]+}}, 32 +define amdgpu_kernel void @uniform_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %a = load i32, ptr addrspace(1) %in + %result = add i32 %a, -32 + store i32 %result, ptr addrspace(1) %out + ret void +} + +; FUNC-LABEL: {{^}}uniform_add_SIC: +; SI: V_SUB_CO_U32_e64 killed %{{[0-9]+}}, 32 +; GFX900: V_SUB_U32_e64 killed %{{[0-9]+}}, 32 +define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %a = load volatile i32, ptr addrspace(1) %gep + %result = add i32 %a, -32 + store i32 %result, ptr addrspace(1) %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll b/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll index c8ef465..a86f25b 100644 --- a/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll @@ -1,63 +1,63 @@ -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100-ASM %s
-
-; GFX906-ASM-LABEL: fadd_test:
-; GFX906-ASM-NOT: global_atomic_add_f32
-; GFX906-ASM: v_illegal
-
-; GFX908-ASM-LABEL: fadd_test:
-; GFX908-ASM-NOT: v_illegal
-; GFX908-ASM: global_atomic_add_f32
-
-; GFX90A-ASM-LABEL: fadd_test:
-; GFX90A-ASM-NOT: v_illegal
-; GFX90A-ASM: global_atomic_add_f32
-
-; GFX940-ASM-LABEL: fadd_test:
-; GFX940-ASM-NOT: v_illegal
-; GFX940-ASM: global_atomic_add_f32
-
-; GFX1030-ASM-LABEL: fadd_test:
-; GFX1030-ASM-NOT: global_atomic_add_f32
-; GFX1030-ASM: v_illegal
-
-; GFX1100-ASM-LABEL: fadd_test:
-; GFX1100-ASM-NOT: v_illegal
-; GFX1100-ASM: global_atomic_add_f32
-
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx906 -d - | FileCheck --check-prefix=GFX906-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx908 -d - | FileCheck --check-prefix=GFX908-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx90a -d - | FileCheck --check-prefix=GFX90A-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx940 -d - | FileCheck --check-prefix=GFX940-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1030 -d - | FileCheck --check-prefix=GFX1030-OBJ %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1100 -d - | FileCheck --check-prefix=GFX1100-OBJ %s
-
-; GFX906-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-OBJ-NEXT: v_illegal // 000000000004: FFFFFFFF
-
-; GFX908-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-OBJ-NEXT: global_atomic_add_f32
-
-; GFX90A-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-OBJ-NEXT: global_atomic_add_f32
-
-; GFX940-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-OBJ-NEXT: global_atomic_add_f32
-
-; GFX1030-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-OBJ-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1030-OBJ-NEXT: v_illegal // 000000000008: 00000000
-
-; GFX1100-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-OBJ-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1100-OBJ-NEXT: global_atomic_add_f32 v[0:1], v2, off
-
-define fastcc void @fadd_test(ptr addrspace(1) nocapture noundef %0, float noundef %1) unnamed_addr {
- %3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) noundef %0, float noundef %1)
- ret void
-}
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906-ASM %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908-ASM %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-ASM %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-ASM %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030-ASM %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100-ASM %s + +; GFX906-ASM-LABEL: fadd_test: +; GFX906-ASM-NOT: global_atomic_add_f32 +; GFX906-ASM: v_illegal + +; GFX908-ASM-LABEL: fadd_test: +; GFX908-ASM-NOT: v_illegal +; GFX908-ASM: global_atomic_add_f32 + +; GFX90A-ASM-LABEL: fadd_test: +; GFX90A-ASM-NOT: v_illegal +; GFX90A-ASM: global_atomic_add_f32 + +; GFX940-ASM-LABEL: fadd_test: +; GFX940-ASM-NOT: v_illegal +; GFX940-ASM: global_atomic_add_f32 + +; GFX1030-ASM-LABEL: fadd_test: +; GFX1030-ASM-NOT: global_atomic_add_f32 +; GFX1030-ASM: v_illegal + +; GFX1100-ASM-LABEL: fadd_test: +; GFX1100-ASM-NOT: v_illegal +; GFX1100-ASM: global_atomic_add_f32 + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx906 -d - | FileCheck --check-prefix=GFX906-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx908 -d - | FileCheck --check-prefix=GFX908-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx90a -d - | FileCheck --check-prefix=GFX90A-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx940 -d - | FileCheck --check-prefix=GFX940-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1030 -d - | FileCheck --check-prefix=GFX1030-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1100 -d - | FileCheck --check-prefix=GFX1100-OBJ %s + +; GFX906-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-OBJ-NEXT: v_illegal // 000000000004: FFFFFFFF + +; GFX908-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-OBJ-NEXT: global_atomic_add_f32 + +; GFX90A-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-OBJ-NEXT: global_atomic_add_f32 + +; GFX940-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-OBJ-NEXT: global_atomic_add_f32 + +; GFX1030-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-OBJ-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1030-OBJ-NEXT: v_illegal // 000000000008: 00000000 + +; GFX1100-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-OBJ-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-OBJ-NEXT: global_atomic_add_f32 v[0:1], v2, off + +define fastcc void @fadd_test(ptr addrspace(1) nocapture noundef %0, float noundef %1) unnamed_addr { + %3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) noundef %0, float noundef %1) + ret void +} +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) diff --git a/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll b/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll index a72cd2b..a2bc0de 100644 --- a/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll +++ b/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll @@ -1,40 +1,40 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-
-; GFX906-LABEL: image_sample_test:
-; GFX906-NOT: v_illegal
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908-NOT: v_illegal
-; GFX908: image_sample_lz
-
-; GFX90A-LABEL: image_sample_test:
-; GFX90A-NOT: image_sample_lz
-; GFX90A: v_illegal
-
-; GFX940-LABEL: image_sample_test:
-; GFX940-NOT: image_sample_lz
-; GFX940: v_illegal
-
-; GFX1030-LABEL: image_sample_test:
-; GFX1030-NOT: v_illegal
-; GFX1030: image_sample_lz
-
-; GFX1100-LABEL: image_sample_test:
-; GFX1100-NOT: v_illegal
-; GFX1100: image_sample_lz
-
-define amdgpu_kernel void @image_sample_test(ptr addrspace(1) %out, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) {
-
- %result = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0)
-
- store <4 x float> %result, ptr addrspace(1) %out
- ret void
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s + +; GFX906-LABEL: image_sample_test: +; GFX906-NOT: v_illegal +; GFX906: image_sample_lz + +; GFX908-LABEL: image_sample_test: +; GFX908-NOT: v_illegal +; GFX908: image_sample_lz + +; GFX90A-LABEL: image_sample_test: +; GFX90A-NOT: image_sample_lz +; GFX90A: v_illegal + +; GFX940-LABEL: image_sample_test: +; GFX940-NOT: image_sample_lz +; GFX940: v_illegal + +; GFX1030-LABEL: image_sample_test: +; GFX1030-NOT: v_illegal +; GFX1030: image_sample_lz + +; GFX1100-LABEL: image_sample_test: +; GFX1100-NOT: v_illegal +; GFX1100: image_sample_lz + +define amdgpu_kernel void @image_sample_test(ptr addrspace(1) %out, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) { + + %result = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + + store <4 x float> %result, ptr addrspace(1) %out + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-wrong-kill-flags.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-wrong-kill-flags.mir index 542f1c2..ba966a2 100644 --- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-wrong-kill-flags.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-wrong-kill-flags.mir @@ -1,60 +1,60 @@ -# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX1030 %s
-
----
-
-# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src0
-# GFX1030: V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
-# GFX1030: V_CMPX_EQ_U32_nosdst_e64 $vgpr0, 0, implicit-def $exec, implicit $exec
-name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src0
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F
-
- renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec
- renamable $vcc_lo = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec
- renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec
- renamable $sgpr48 = S_MOV_B32 0
- renamable $sgpr68 = COPY renamable $sgpr66
- renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo
- renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc
- $exec_lo = S_MOV_B32_term killed renamable $sgpr6
-...
-
-# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src1
-# GFX1030: V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
-# GFX1030: V_CMPX_EQ_U32_nosdst_e64 0, $vgpr0, implicit-def $exec, implicit $exec
-name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src1
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F
-
- renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec
- renamable $vcc_lo = V_CMP_EQ_U32_e64 0, $vgpr0, implicit $exec
- renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec
- renamable $sgpr48 = S_MOV_B32 0
- renamable $sgpr68 = COPY renamable $sgpr66
- renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo
- renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc
- $exec_lo = S_MOV_B32_term killed renamable $sgpr6
-...
-
-# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_non_reg
-# GFX1030: V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec
-# GFX1030: V_CMPX_EQ_U32_nosdst_e64 0, 8, implicit-def $exec, implicit $exec
-name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_non_reg
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F
-
- renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec
- renamable $vcc_lo = V_CMP_EQ_U32_e64 0, 8, implicit $exec
- renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec
- renamable $sgpr48 = S_MOV_B32 0
- renamable $sgpr68 = COPY renamable $sgpr66
- renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo
- renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc
- $exec_lo = S_MOV_B32_term killed renamable $sgpr6
-...
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX1030 %s + +--- + +# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src0 +# GFX1030: V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec +# GFX1030: V_CMPX_EQ_U32_nosdst_e64 $vgpr0, 0, implicit-def $exec, implicit $exec +name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F + + renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec + renamable $vcc_lo = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec + renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec + renamable $sgpr48 = S_MOV_B32 0 + renamable $sgpr68 = COPY renamable $sgpr66 + renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo + renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc + $exec_lo = S_MOV_B32_term killed renamable $sgpr6 +... + +# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src1 +# GFX1030: V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec +# GFX1030: V_CMPX_EQ_U32_nosdst_e64 0, $vgpr0, implicit-def $exec, implicit $exec +name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_src1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F + + renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec + renamable $vcc_lo = V_CMP_EQ_U32_e64 0, $vgpr0, implicit $exec + renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec + renamable $sgpr48 = S_MOV_B32 0 + renamable $sgpr68 = COPY renamable $sgpr66 + renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo + renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc + $exec_lo = S_MOV_B32_term killed renamable $sgpr6 +... + +# GFX1030-LABEL: name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_non_reg +# GFX1030: V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec +# GFX1030: V_CMPX_EQ_U32_nosdst_e64 0, 8, implicit-def $exec, implicit $exec +name: vcmp_saveexec_to_vcmpx_wrong_kill_flags_non_reg +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F + + renamable $vgpr0 = V_AND_B32_e32 128, $vgpr90, implicit $exec + renamable $vcc_lo = V_CMP_EQ_U32_e64 0, 8, implicit $exec + renamable $sgpr4 = V_CMP_NE_U32_e64 0, killed $vgpr0, implicit $exec + renamable $sgpr48 = S_MOV_B32 0 + renamable $sgpr68 = COPY renamable $sgpr66 + renamable $sgpr5 = COPY $exec_lo, implicit-def $exec_lo + renamable $sgpr6 = S_AND_B32 renamable $sgpr5, killed renamable $vcc_lo, implicit-def dead $scc + $exec_lo = S_MOV_B32_term killed renamable $sgpr6 +... |
