aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarrison Hao <57025411+harrisonGPU@users.noreply.github.com>2025-06-05 22:07:06 +0800
committerGitHub <noreply@github.com>2025-06-05 22:07:06 +0800
commitb2379bd5d59993c0d859ad90f9f5cdfcfce26e71 (patch)
treecb64510d58800e13adc80b3719d14c9d273679b0
parent2d7f53bc27c345b844013b0a64040119bd362e51 (diff)
downloadllvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.zip
llvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.tar.gz
llvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.tar.bz2
[AMDGPU] Support bottom-up postRA scheduing. (#135295)
Solely relying on top‑down scheduling can underutilize hardware, since long‑latency instructions often end up scheduled too late and their latency isn’t well hidden. Adding bottom‑up post‑RA scheduling lets us move those instructions earlier, which improves latency hiding and yields roughly a 2% performance gain on key benchmarks.
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp3
-rw-r--r--llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir161
2 files changed, 163 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 1561efe..bc95d3f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -417,7 +417,8 @@ void GCNHazardRecognizer::AdvanceCycle() {
}
void GCNHazardRecognizer::RecedeCycle() {
- llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
+ assert(!IsHazardRecognizerMode &&
+ "Bottom-up scheduling shouldn't run in hazard recognizer mode");
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
new file mode 100644
index 0000000..a2a0794a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
@@ -0,0 +1,161 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=TOPDOWN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=BOTTOMUP %s
+
+# This test demonstrates how bottom-up scheduling moves IMAGE_SAMPLE instructions
+# earlier compared to top-down scheduling.
+
+---
+name: sched_image_sample_post_ra
+tracksRegLiveness: true
+tracksDebugUserValues: true
+body: |
+ ; TOPDOWN-LABEL: name: sched_image_sample_post_ra
+ ; TOPDOWN: bb.0.entry:
+ ; TOPDOWN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; TOPDOWN-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ ; TOPDOWN-NEXT: {{ $}}
+ ; TOPDOWN-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; TOPDOWN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; TOPDOWN-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5
+ ; TOPDOWN-NEXT: $m0 = S_MOV_B32 killed $sgpr6
+ ; TOPDOWN-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec
+ ; TOPDOWN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; TOPDOWN-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; TOPDOWN-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ ; TOPDOWN-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1
+ ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; TOPDOWN-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3
+ ; TOPDOWN-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2
+ ; TOPDOWN-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1
+ ; TOPDOWN-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ ; TOPDOWN-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+ ; TOPDOWN-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ ; TOPDOWN-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ ; TOPDOWN-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ ; TOPDOWN-NEXT: }
+ ; TOPDOWN-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TOPDOWN-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, killed $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TOPDOWN-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; TOPDOWN-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; TOPDOWN-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; TOPDOWN-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TOPDOWN-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, killed $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TOPDOWN-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ ; TOPDOWN-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ ; TOPDOWN-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ ; TOPDOWN-NEXT: }
+ ; TOPDOWN-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc
+ ; TOPDOWN-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; TOPDOWN-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ ; TOPDOWN-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; TOPDOWN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; TOPDOWN-NEXT: {{ $}}
+ ; TOPDOWN-NEXT: bb.1:
+ ; TOPDOWN-NEXT: successors: %bb.2(0x80000000)
+ ; TOPDOWN-NEXT: {{ $}}
+ ; TOPDOWN-NEXT: S_BRANCH %bb.2
+ ; TOPDOWN-NEXT: {{ $}}
+ ; TOPDOWN-NEXT: bb.2:
+ ; TOPDOWN-NEXT: S_ENDPGM 0
+ ;
+ ; BOTTOMUP-LABEL: name: sched_image_sample_post_ra
+ ; BOTTOMUP: bb.0.entry:
+ ; BOTTOMUP-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; BOTTOMUP-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ ; BOTTOMUP-NEXT: {{ $}}
+ ; BOTTOMUP-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; BOTTOMUP-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; BOTTOMUP-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec
+ ; BOTTOMUP-NEXT: $m0 = S_MOV_B32 killed $sgpr6
+ ; BOTTOMUP-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5
+ ; BOTTOMUP-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; BOTTOMUP-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; BOTTOMUP-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ ; BOTTOMUP-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1
+ ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; BOTTOMUP-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2
+ ; BOTTOMUP-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3
+ ; BOTTOMUP-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1
+ ; BOTTOMUP-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ ; BOTTOMUP-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+ ; BOTTOMUP-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ ; BOTTOMUP-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ ; BOTTOMUP-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ ; BOTTOMUP-NEXT: }
+ ; BOTTOMUP-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BOTTOMUP-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, killed $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BOTTOMUP-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BOTTOMUP-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, killed $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ ; BOTTOMUP-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ ; BOTTOMUP-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ ; BOTTOMUP-NEXT: }
+ ; BOTTOMUP-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; BOTTOMUP-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc
+ ; BOTTOMUP-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; BOTTOMUP-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ ; BOTTOMUP-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; BOTTOMUP-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; BOTTOMUP-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; BOTTOMUP-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; BOTTOMUP-NEXT: {{ $}}
+ ; BOTTOMUP-NEXT: bb.1:
+ ; BOTTOMUP-NEXT: successors: %bb.2(0x80000000)
+ ; BOTTOMUP-NEXT: {{ $}}
+ ; BOTTOMUP-NEXT: S_BRANCH %bb.2
+ ; BOTTOMUP-NEXT: {{ $}}
+ ; BOTTOMUP-NEXT: bb.2:
+ ; BOTTOMUP-NEXT: S_ENDPGM 0
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.34(0x40000000); %bb.1(50.00%), %bb.34(50.00%)
+ liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ $sgpr12_sgpr13 = S_MOV_B64 $exec
+ $exec = S_WQM_B64 $exec, implicit-def $scc
+ $sgpr14 = S_MOV_B32 $sgpr5
+ $m0 = S_MOV_B32 $sgpr6
+ $sgpr16_sgpr17 = S_MOV_B64 $exec
+ $exec = S_WQM_B64 $exec, implicit-def $scc
+ $sgpr10 = S_MOV_B32 $sgpr3
+ $sgpr9 = S_MOV_B32 $sgpr2
+ $sgpr8 = S_MOV_B32 $sgpr1
+ $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ renamable $sgpr0_sgpr1 = S_GETPC_B64
+ $sgpr15 = S_MOV_B32 killed $sgpr1
+ $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit $sgpr14_sgpr15 {
+ renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ }
+ renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ BUNDLE implicit-def $vgpr11, implicit $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ }
+ nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr11, implicit-def $vcc, implicit $mode, implicit $exec
+ renamable $sgpr0_sgpr1 = COPY $vcc
+ nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr8, implicit-def $vcc, implicit $mode, implicit $exec
+ renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ S_CBRANCH_EXECZ %bb.34, implicit $exec
+
+ bb.1:
+ successors: %bb.34(0x80000000)
+ S_BRANCH %bb.34
+
+ bb.34:
+ S_ENDPGM 0
+...