aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2024-01-15 18:20:10 +0000
committerGitHub <noreply@github.com>2024-01-15 18:20:10 +0000
commited60cb8fb98bf3cfede8c0912fe2845a4166370b (patch)
tree429141692e25705f248713f8169102ead54c4238
parent85705bbf1dada62c7ee266bb3071e9c4ab4a4bfb (diff)
downloadllvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.zip
llvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.tar.gz
llvm-ed60cb8fb98bf3cfede8c0912fe2845a4166370b.tar.bz2
[AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12 (#78188)
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h2
-rw-r--r--llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir350
2 files changed, 232 insertions, 120 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ba1302e..17e5a95 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1139,7 +1139,7 @@ public:
bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
bool hasVALUPartialForwardingHazard() const {
- return getGeneration() >= GFX11;
+ return getGeneration() == GFX11;
}
bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
index 3d26990..56eb8ce 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -1,17 +1,24 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s
---
name: partial_forwarding_1_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_1_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_1_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_1_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
@@ -23,24 +30,41 @@ body: |
name: partial_forwarding_2_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_2_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr2 = S_MOV_B32 0
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $sgpr3 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr9 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_2_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_2_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 0
$sgpr1 = S_MOV_B32 0
@@ -63,19 +87,31 @@ body: |
name: partial_forwarding_3_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_3_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_3_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_3_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -186,19 +222,31 @@ body: |
name: partial_forwarding_4_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_4_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_4_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_4_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
@@ -247,19 +295,31 @@ body: |
name: partial_forwarding_5_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_5_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_5_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_5_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$vgpr11 = V_MOV_B32_e32 0, implicit $exec
@@ -307,33 +367,59 @@ body: |
---
name: partial_forwarding_branching_1a
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1a
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1a
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1a
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -358,33 +444,59 @@ body: |
---
name: partial_forwarding_branching_1b
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1b
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1b
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1b
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr30 = V_MOV_B32_e32 0, implicit $exec