Implement convergence control in MIR using SelectionDAG (#71785)

LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
author: Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com> 2024-02-21 10:06:37 +0530
committer: GitHub <noreply@github.com> 2024-02-21 10:06:37 +0530
commit: 79889734b940356ab3381423c93ae06f22e772c9 (patch)
tree: d0265e450f2cb9a389e62d0e6a3a70aa339a7084 /llvm/test/MachineVerifier
parent: 03203b79c6247465850ee6e9f3e2399afc35720b (diff)
download: llvm-79889734b940356ab3381423c93ae06f22e772c9.zip
llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.gz
llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.bz2
6 files changed, 141 insertions, 0 deletions
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir
new file mode 100644
index 0000000..94d0dda
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir
@@ -0,0 +1,37 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            basic
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2;
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    ; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block.
+    ; CHECK: CONVERGENCECTRL_ENTRY
+    %1:sgpr_64 = CONVERGENCECTRL_ENTRY
+    ; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block.
+    ; CHECK: CONVERGENCECTRL_LOOP
+    %2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_CBRANCH_EXECZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2;
+    ; CHECK: Entry intrinsic can occur only in the entry block.
+    ; CHECK: CONVERGENCECTRL_ENTRY
+    %5:sgpr_64 = CONVERGENCECTRL_ENTRY
+
+  bb.2:
+    ; CHECK: Convergence control tokens can only be used by convergent operations.
+    ; CHECK: G_PHI
+    %6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1
+    %7:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %8:sgpr_64 = IMPLICIT_DEF
+    %4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64
+    ; CHECK: An operation can use at most one convergence control token.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3
+    %10:sgpr_64 = SI_CALL %8:sgpr_64, 3
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir
new file mode 100644
index 0000000..87cf3e6
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir
@@ -0,0 +1,52 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            cycles
+body:             |
+  bb.0:
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %1:sgpr_64 = IMPLICIT_DEF
+    S_CBRANCH_EXECZ %bb.9, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.8, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.2:
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.3:
+    ; CHECK: Cycle heart must dominate all blocks in the cycle.
+    ; Irreducible cycle: entries(bb.4 bb.3)
+    %3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_BRANCH %bb.3
+
+  bb.5:
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.6:
+    S_BRANCH %bb.7
+
+  bb.7:
+    ; CHECK: Cycle heart must dominate all blocks in the cycle.
+    ; Reducible cycle: entries(bb.6) bb.7
+    %4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.6
+
+  bb.8:
+    ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition.
+    %5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    %6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.8
+
+  bb.9:
+    ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition.
+    %7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64
+    S_BRANCH %bb.9
+
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg
new file mode 100644
index 0000000..7c49242
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AMDGPU" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir
new file mode 100644
index 0000000..c70a48b
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir
@@ -0,0 +1,15 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            mixed2
+body:             |
+  bb.0:
+    %0:sgpr_64 = IMPLICIT_DEF
+    %1:sgpr_64 = SI_CALL %0, 1
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: CONVERGENCECTRL_ANCHOR
+    %2:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64
+
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir
new file mode 100644
index 0000000..b3834f4
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir
@@ -0,0 +1,11 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            not_ssa
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK: Convergence control requires SSA.
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %8:sgpr_64 = IMPLICIT_DEF
+    %8:sgpr_64 = IMPLICIT_DEF
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir
new file mode 100644
index 0000000..9e869ac
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir
@@ -0,0 +1,24 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            region_nesting
+body:             |
+  bb.0:
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %1:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %2:sgpr_64 = IMPLICIT_DEF
+    %3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64
+    ; CHECK: Convergence region is not well-nested.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64
+    S_CBRANCH_EXECZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    %5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64
+
+  bb.2:
+    ; CHECK: Convergence region is not well-nested.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4
+    %6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64
+
+...
author	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>	2024-02-21 10:06:37 +0530
committer	GitHub <noreply@github.com>	2024-02-21 10:06:37 +0530
commit	79889734b940356ab3381423c93ae06f22e772c9 (patch)
tree	d0265e450f2cb9a389e62d0e6a3a70aa339a7084 /llvm/test/MachineVerifier
parent	03203b79c6247465850ee6e9f3e2399afc35720b (diff)
download	llvm-79889734b940356ab3381423c93ae06f22e772c9.zip llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.gz llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.bz2