diff options
author | Jay Foad <jay.foad@amd.com> | 2024-01-15 18:20:10 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-15 18:20:10 +0000 |
commit | ed60cb8fb98bf3cfede8c0912fe2845a4166370b (patch) | |
tree | 429141692e25705f248713f8169102ead54c4238 | |
parent | 85705bbf1dada62c7ee266bb3071e9c4ab4a4bfb (diff) | |
download | llvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.zip llvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.tar.gz llvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.tar.bz2 |
[AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12 (#78188)
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir | 350 |
2 files changed, 232 insertions, 120 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ba1302e..17e5a95 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1139,7 +1139,7 @@ public: bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } bool hasVALUPartialForwardingHazard() const { - return getGeneration() >= GFX11; + return getGeneration() == GFX11; } bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir index 3d26990..56eb8ce 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir @@ -1,17 +1,24 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s +# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s --- name: partial_forwarding_1_hazard body: | bb.0: - ; GCN-LABEL: name: partial_forwarding_1_hazard - ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_1_hazard + ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_1_hazard + ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $exec = S_MOV_B64 -1 $vgpr1 = V_MOV_B32_e32 0, implicit $exec @@ -23,24 +30,41 @@ body: | name: partial_forwarding_2_hazard body: | bb.0: - ; GCN-LABEL: name: partial_forwarding_2_hazard - ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr9 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr10 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_2_hazard + ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr4 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr5 = S_MOV_B32 0 + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $sgpr6 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr7 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_2_hazard + ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0 + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr4 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr5 = S_MOV_B32 0 + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $sgpr6 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr7 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr8 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr9 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr10 = S_MOV_B32 0 + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $sgpr0 = S_MOV_B32 0 $sgpr1 = S_MOV_B32 0 @@ -63,19 +87,31 @@ body: | name: partial_forwarding_3_hazard body: | bb.0: - ; GCN-LABEL: name: partial_forwarding_3_hazard - ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_3_hazard + ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_3_hazard + ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr10 = V_MOV_B32_e32 0, implicit $exec $exec = S_MOV_B64 -1 @@ -186,19 +222,31 @@ body: | name: partial_forwarding_4_hazard body: | bb.0: - ; GCN-LABEL: name: partial_forwarding_4_hazard - ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_4_hazard + ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_4_hazard + ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $exec = S_MOV_B64 -1 $vgpr10 = V_MOV_B32_e32 0, implicit $exec @@ -247,19 +295,31 @@ body: | name: partial_forwarding_5_hazard body: | bb.0: - ; GCN-LABEL: name: partial_forwarding_5_hazard - ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_5_hazard + ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_5_hazard + ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr10 = V_MOV_B32_e32 0, implicit $exec $vgpr11 = V_MOV_B32_e32 0, implicit $exec @@ -307,33 +367,59 @@ body: | --- name: partial_forwarding_branching_1a body: | - ; GCN-LABEL: name: partial_forwarding_branching_1a - ; GCN: bb.0: - ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.2: - ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_branching_1a + ; GFX11: bb.0: + ; GFX11-NEXT: successors: %bb.2(0x80000000) + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: S_BRANCH %bb.2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: bb.1: + ; GFX11-NEXT: successors: %bb.2(0x80000000) + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_BRANCH %bb.2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: bb.2: + ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_branching_1a + ; GFX12: bb.0: + ; GFX12-NEXT: successors: %bb.2(0x80000000) + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: S_BRANCH %bb.2 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: bb.1: + ; GFX12-NEXT: successors: %bb.2(0x80000000) + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: S_BRANCH %bb.2 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: bb.2: + ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 bb.0: $vgpr0 = V_MOV_B32_e32 0, implicit $exec $exec = S_MOV_B64 -1 @@ -358,33 +444,59 @@ body: | --- name: partial_forwarding_branching_1b body: | - ; GCN-LABEL: name: partial_forwarding_branching_1b - ; GCN: bb.0: - ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $exec = S_MOV_B64 -1 - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.2: - ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 - ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: partial_forwarding_branching_1b + ; GFX11: bb.0: + ; GFX11-NEXT: successors: %bb.2(0x80000000) + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_BRANCH %bb.2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: bb.1: + ; GFX11-NEXT: successors: %bb.2(0x80000000) + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $exec = S_MOV_B64 -1 + ; GFX11-NEXT: S_BRANCH %bb.2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: bb.2: + ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0 + ; GFX12-LABEL: name: partial_forwarding_branching_1b + ; GFX12: bb.0: + ; GFX12-NEXT: successors: %bb.2(0x80000000) + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: S_BRANCH %bb.2 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: bb.1: + ; GFX12-NEXT: successors: %bb.2(0x80000000) + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $exec = S_MOV_B64 -1 + ; GFX12-NEXT: S_BRANCH %bb.2 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: bb.2: + ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 bb.0: $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr30 = V_MOV_B32_e32 0, implicit $exec |