diff options
author | Harrison Hao <57025411+harrisonGPU@users.noreply.github.com> | 2025-06-05 22:07:06 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-05 22:07:06 +0800 |
commit | b2379bd5d59993c0d859ad90f9f5cdfcfce26e71 (patch) | |
tree | cb64510d58800e13adc80b3719d14c9d273679b0 | |
parent | 2d7f53bc27c345b844013b0a64040119bd362e51 (diff) | |
download | llvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.zip llvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.tar.gz llvm-b2379bd5d59993c0d859ad90f9f5cdfcfce26e71.tar.bz2 |
[AMDGPU] Support bottom-up postRA scheduing. (#135295)
Solely relying on top‑down scheduling can underutilize hardware, since
long‑latency instructions often end up scheduled too late and their
latency isn’t well hidden. Adding bottom‑up post‑RA scheduling lets us
move those instructions earlier, which improves latency hiding and
yields roughly a 2% performance gain on key benchmarks.
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir | 161 |
2 files changed, 163 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1561efe..bc95d3f 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -417,7 +417,8 @@ void GCNHazardRecognizer::AdvanceCycle() { } void GCNHazardRecognizer::RecedeCycle() { - llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); + assert(!IsHazardRecognizerMode && + "Bottom-up scheduling shouldn't run in hazard recognizer mode"); } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir new file mode 100644 index 0000000..a2a0794a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir @@ -0,0 +1,161 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=TOPDOWN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=BOTTOMUP %s + +# This test demonstrates how bottom-up scheduling moves IMAGE_SAMPLE instructions +# earlier compared to top-down scheduling. + +--- +name: sched_image_sample_post_ra +tracksRegLiveness: true +tracksDebugUserValues: true +body: | + ; TOPDOWN-LABEL: name: sched_image_sample_post_ra + ; TOPDOWN: bb.0.entry: + ; TOPDOWN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; TOPDOWN-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; TOPDOWN-NEXT: {{ $}} + ; TOPDOWN-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; TOPDOWN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc + ; TOPDOWN-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5 + ; TOPDOWN-NEXT: $m0 = S_MOV_B32 killed $sgpr6 + ; TOPDOWN-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec + ; TOPDOWN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc + ; TOPDOWN-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec + ; TOPDOWN-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec + ; TOPDOWN-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1 + ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; TOPDOWN-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3 + ; TOPDOWN-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2 + ; TOPDOWN-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1 + ; TOPDOWN-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17 + ; TOPDOWN-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 { + ; TOPDOWN-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256)) + ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128)) + ; TOPDOWN-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256)) + ; TOPDOWN-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128)) + ; TOPDOWN-NEXT: } + ; TOPDOWN-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; TOPDOWN-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, killed $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; TOPDOWN-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; TOPDOWN-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; TOPDOWN-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; TOPDOWN-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; TOPDOWN-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, killed $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; TOPDOWN-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 { + ; TOPDOWN-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; TOPDOWN-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) + ; TOPDOWN-NEXT: } + ; TOPDOWN-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; TOPDOWN-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc + ; TOPDOWN-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; TOPDOWN-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc + ; TOPDOWN-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + ; TOPDOWN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; TOPDOWN-NEXT: {{ $}} + ; TOPDOWN-NEXT: bb.1: + ; TOPDOWN-NEXT: successors: %bb.2(0x80000000) + ; TOPDOWN-NEXT: {{ $}} + ; TOPDOWN-NEXT: S_BRANCH %bb.2 + ; TOPDOWN-NEXT: {{ $}} + ; TOPDOWN-NEXT: bb.2: + ; TOPDOWN-NEXT: S_ENDPGM 0 + ; + ; BOTTOMUP-LABEL: name: sched_image_sample_post_ra + ; BOTTOMUP: bb.0.entry: + ; BOTTOMUP-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; BOTTOMUP-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; BOTTOMUP-NEXT: {{ $}} + ; BOTTOMUP-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec + ; BOTTOMUP-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc + ; BOTTOMUP-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec + ; BOTTOMUP-NEXT: $m0 = S_MOV_B32 killed $sgpr6 + ; BOTTOMUP-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5 + ; BOTTOMUP-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc + ; BOTTOMUP-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec + ; BOTTOMUP-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec + ; BOTTOMUP-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1 + ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64 + ; BOTTOMUP-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2 + ; BOTTOMUP-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3 + ; BOTTOMUP-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1 + ; BOTTOMUP-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17 + ; BOTTOMUP-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 { + ; BOTTOMUP-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256)) + ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128)) + ; BOTTOMUP-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256)) + ; BOTTOMUP-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128)) + ; BOTTOMUP-NEXT: } + ; BOTTOMUP-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; BOTTOMUP-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, killed $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; BOTTOMUP-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; BOTTOMUP-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, killed $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode + ; BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 { + ; BOTTOMUP-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; BOTTOMUP-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) + ; BOTTOMUP-NEXT: } + ; BOTTOMUP-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; BOTTOMUP-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc + ; BOTTOMUP-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; BOTTOMUP-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc + ; BOTTOMUP-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; BOTTOMUP-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; BOTTOMUP-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + ; BOTTOMUP-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; BOTTOMUP-NEXT: {{ $}} + ; BOTTOMUP-NEXT: bb.1: + ; BOTTOMUP-NEXT: successors: %bb.2(0x80000000) + ; BOTTOMUP-NEXT: {{ $}} + ; BOTTOMUP-NEXT: S_BRANCH %bb.2 + ; BOTTOMUP-NEXT: {{ $}} + ; BOTTOMUP-NEXT: bb.2: + ; BOTTOMUP-NEXT: S_ENDPGM 0 + bb.0.entry: + successors: %bb.1(0x40000000), %bb.34(0x40000000); %bb.1(50.00%), %bb.34(50.00%) + liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + $sgpr12_sgpr13 = S_MOV_B64 $exec + $exec = S_WQM_B64 $exec, implicit-def $scc + $sgpr14 = S_MOV_B32 $sgpr5 + $m0 = S_MOV_B32 $sgpr6 + $sgpr16_sgpr17 = S_MOV_B64 $exec + $exec = S_WQM_B64 $exec, implicit-def $scc + $sgpr10 = S_MOV_B32 $sgpr3 + $sgpr9 = S_MOV_B32 $sgpr2 + $sgpr8 = S_MOV_B32 $sgpr1 + $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec + $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec + renamable $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr15 = S_MOV_B32 killed $sgpr1 + $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17 + BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit $sgpr14_sgpr15 { + renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256)) + renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128)) + renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256)) + renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128)) + } + renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode + renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode + renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode + renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode + BUNDLE implicit-def $vgpr11, implicit $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 { + renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) + } + nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr11, implicit-def $vcc, implicit $mode, implicit $exec + renamable $sgpr0_sgpr1 = COPY $vcc + nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr8, implicit-def $vcc, implicit $mode, implicit $exec + renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec + renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc + $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec + S_CBRANCH_EXECZ %bb.34, implicit $exec + + bb.1: + successors: %bb.34(0x80000000) + S_BRANCH %bb.34 + + bb.34: + S_ENDPGM 0 +... |