aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Imschweiler <robert.imschweiler@amd.com>2025-08-26 04:21:39 -0500
committerRobert Imschweiler <robert.imschweiler@amd.com>2025-08-26 04:21:39 -0500
commit25c7029d8c26c6c81fc243216d4c50648439048c (patch)
tree2e8e90f079d7ee208a78d64526b1e7de11aa8038
parent1975e4bd4ec6ae47d2f0b112eb8008329dabd29d (diff)
downloadllvm-users/ro-i/callbr-amdgpu_2.zip
llvm-users/ro-i/callbr-amdgpu_2.tar.gz
llvm-users/ro-i/callbr-amdgpu_2.tar.bz2
clarify state of implementationusers/ro-i/callbr-amdgpu_2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp7
-rw-r--r--llvm/test/Transforms/StructurizeCFG/callbr.ll200
3 files changed, 198 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 2df6bbc..706237b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -260,7 +260,7 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (DummyReturnBB == nullptr)
+ if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
@@ -275,7 +275,7 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
}
Changed = true;
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
- if (DummyReturnBB == nullptr)
+ if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 847f63d..6422d69 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1396,7 +1396,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
- // CallBr and its corresponding blocks must not be modified by this pass.
+ // CallBr and its corresponding direct target blocks are for now ignored by
+ // this pass. This is not a limitation for the currently intended uses cases
+ // of callbr in the AMDGPU backend (cf.
+ // https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087).
+ // Parent and child regions are not affected by this (current) restriction.
+ // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;
diff --git a/llvm/test/Transforms/StructurizeCFG/callbr.ll b/llvm/test/Transforms/StructurizeCFG/callbr.ll
index 6d8e4a7..42f9519 100644
--- a/llvm/test/Transforms/StructurizeCFG/callbr.ll
+++ b/llvm/test/Transforms/StructurizeCFG/callbr.ll
@@ -2,12 +2,192 @@
; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
; Structurize as usual, but don't tear callbr and its destination blocks apart.
+;
+; Note: currently, callbr blocks and their corresponding target blocks
+; themselves are not handled by the structurizer.* If the CFG turns out to be
+; unstructured at the end, the CFG lowering (si-annotate-control-flow) will
+; detect this. For the currently intended use cases of callbr in the context of
+; the AMDGPU backend, this is not a limitation (cf.
+; https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087).
+;
+; Note 2: while callbr and its targets remain untouched, everything else is
+; handled as usual, even if it is nested in a callbr region.
+;
+; *FIXME: this will be fixed in the future. Callbr can be handled as follows:
+; Input IR:
+; ```
+; define void @foo_callbr() {
+; callbr void asm "", "!i"() to label %fallthrough [label %indirect, ...]
+; fallthrough:
+; br label %exit
+; indirect:
+; br label %exit
+; ...
+; exit:
+; ret void
+; }
+; ```
+;
+; Output IR:
+; ```
+; define void @foo_callbr() {
+; callbr void asm "", "!i"()
+; to label %fallthrough [label %fake.indirect, label %fake.indirect1, label %fake.indirect2, ...]
+; fake.indirect: ; preds = %0
+; br label %Flow
+; fake.indirect1: ; preds = %0
+; br label %Flow
+; fake.indirect2: ; preds = %0
+; br label %Flow
+; ...
+; Flow: ; preds = %fallthrough, %fake.indirect[0-N]
+; %1 = phi i1 [ false, %fallthrough ], [ true, %fake.indirect ], [ false, %fake.indirect[1-N] ]
+; br i1 %1, label %indirect, label %Flow1
+; Flow1: ; preds = %Flow, %indirect
+; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect1 ], [ false, %indirect ]
+; br i1 %2, label %indirect1, label %Flow2
+; Flow2: ; preds = %Flow, %indirect1
+; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect2 ], [ false, %indirect1 ]
+; br i1 %2, label %indirect2, label %Flow3
+; ...
+; fallthrough: ; preds = %0
+; br label %Flow
+; indirect: ; preds = %Flow
+; br label %Flow1
+; indirect1: ; preds = %Flow1
+; br label %Flow2
+; indirect2: : preds = %Flow2
+; br label %Flow3
+; ...
+; exit: ; preds = %indirectN, %FlowN
+; ret void
+; }
+; ```
+;
+; Output IR as ASCII-art:
+; %0
+; ---------------------
+; | | | |
+; v v v v
+; f f.i f.i1 f.i2
+; | | | |
+; v v v v
+; ---------------------
+; %Flow
+; | \
+; | %indirect
+; | /
+; %Flow1
+; | \
+; | %indirect1
+; | /
+; %Flow2
+; | \
+; | %indirect2
+; | /
+; %exit
+;
+
+; Only callbr, nothing to do.
+define void @callbr_simple() {
+; CHECK-LABEL: define void @callbr_simple() {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
+; CHECK: [[INDIRECT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[INDIRECT1:.*:]]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br label %exit
+indirect:
+ br label %exit
+exit:
+ ret void
+}
+
+; Callbr nested in non-callbr: non-callbr is transformed
+define void @callbr_in_non_callbr(i1 %c) {
+; CHECK-LABEL: define void @callbr_in_non_callbr(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[NOCALLBR]] ], [ true, [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[EXIT:.*]]
+; CHECK: [[CALLBR]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
+; CHECK: [[INDIRECT]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[INDIRECT1:.*:]]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[NOCALLBR]]:
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %callbr, label %nocallbr
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br label %exit
+indirect:
+ br label %exit
+nocallbr:
+ br label %exit
+exit:
+ ret void
+}
+
+; Callbr parent of non-callbr: non-callbr is transformed
+define void @non_callbr_in_callbr(i1 %c) {
+; CHECK-LABEL: define void @non_callbr_in_callbr(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
+; CHECK: [[INDIRECT]]:
+; CHECK-NEXT: br i1 [[C_INV]], label %[[FALLTHROUGH2:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FALLTHROUGH2]] ], [ true, %[[INDIRECT]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FALLTHROUGH1:.*]], label %[[FLOW1:.*]]
+; CHECK: [[FALLTHROUGH1]]:
+; CHECK-NEXT: br label %[[FLOW1]]
+; CHECK: [[FALLTHROUGH2]]:
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[INDIRECT1:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[FLOW1]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %c, label %fallthrough1, label %fallthrough2
+fallthrough1:
+ br label %exit
+fallthrough2:
+ br label %exit
+indirect:
+ br label %exit
+exit:
+ ret void
+}
-define void @callbr_inline_asm(i32 %c, i1 %d, i1 %e) {
-; CHECK-LABEL: define void @callbr_inline_asm(
-; CHECK-SAME: i32 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]]) {
-; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
-; CHECK-NEXT: br i1 [[D_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]]
+; Callbr surrounded by non-callbr: all three regular branches are handled
+; correctly
+define void @callbr_nested_in_non_callbr(i1 %c, i1 %d, i1 %e, i1 %f) {
+; CHECK-LABEL: define void @callbr_nested_in_non_callbr(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]], i1 [[F:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]]
; CHECK: [[FLOW3]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]]
@@ -15,7 +195,7 @@ define void @callbr_inline_asm(i32 %c, i1 %d, i1 %e) {
; CHECK-NEXT: callbr void asm "", "!i"()
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
; CHECK: [[INDIRECT]]:
-; CHECK-NEXT: br i1 [[E]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]]
+; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]]
; CHECK: [[FALLTHROUGH1]]:
; CHECK-NEXT: br label %[[FLOW2]]
; CHECK: [[INDIRECT2:.*:]]
@@ -23,7 +203,7 @@ define void @callbr_inline_asm(i32 %c, i1 %d, i1 %e) {
; CHECK: [[INDIRECT1]]:
; CHECK-NEXT: br label %[[FLOW1]]
; CHECK: [[NOCALLBR]]:
-; CHECK-NEXT: br i1 [[E]], label %[[NOCALLBR1:.*]], label %[[FLOW]]
+; CHECK-NEXT: br i1 [[F]], label %[[NOCALLBR1:.*]], label %[[FLOW]]
; CHECK: [[NOCALLBR1]]:
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[FLOW]]:
@@ -35,11 +215,11 @@ define void @callbr_inline_asm(i32 %c, i1 %d, i1 %e) {
; CHECK: [[RET]]:
; CHECK-NEXT: ret void
;
- br i1 %d, label %callbr, label %nocallbr
+ br i1 %c, label %callbr, label %nocallbr
callbr:
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
fallthrough:
- br i1 %e, label %fallthrough1, label %ret
+ br i1 %d, label %fallthrough1, label %ret
fallthrough1:
br label %ret
indirect:
@@ -47,7 +227,7 @@ indirect:
indirect1:
br label %ret
nocallbr:
- br i1 %e, label %nocallbr1, label %ret
+ br i1 %f, label %nocallbr1, label %ret
nocallbr1:
br label %ret
ret: