Restore "Implement convergence control in MIR using SelectionDAG (#71785)"

Original commit 79889734b940356ab3381423c93ae06f22e772c9. Perviously reverted in commit a2afcd5721869d1d03c8146bae3885b3385ba15e. LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
author: Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com> 2024-03-04 11:31:27 +0530
committer: Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com> 2024-03-04 13:28:04 +0530
commit: c7fdd8c11e54585dc9d15d63de9742067e0506b9 (patch)
tree: a85bf57fcacb1f8aab192bef09e6a3e535434cec /llvm/test/MachineVerifier
parent: 63725ab1196ac50509ad382fc12c56f6d8b5d874 (diff)
download: llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.zip
llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.tar.gz
llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.tar.bz2
5 files changed, 130 insertions, 0 deletions
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir
new file mode 100644
index 0000000..94d0dda
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir
@@ -0,0 +1,37 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            basic
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1, %bb.2;
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    ; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block.
+    ; CHECK: CONVERGENCECTRL_ENTRY
+    %1:sgpr_64 = CONVERGENCECTRL_ENTRY
+    ; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block.
+    ; CHECK: CONVERGENCECTRL_LOOP
+    %2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_CBRANCH_EXECZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2;
+    ; CHECK: Entry intrinsic can occur only in the entry block.
+    ; CHECK: CONVERGENCECTRL_ENTRY
+    %5:sgpr_64 = CONVERGENCECTRL_ENTRY
+
+  bb.2:
+    ; CHECK: Convergence control tokens can only be used by convergent operations.
+    ; CHECK: G_PHI
+    %6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1
+    %7:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %8:sgpr_64 = IMPLICIT_DEF
+    %4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64
+    ; CHECK: An operation can use at most one convergence control token.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3
+    %10:sgpr_64 = SI_CALL %8:sgpr_64, 3
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir
new file mode 100644
index 0000000..87cf3e6
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir
@@ -0,0 +1,52 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            cycles
+body:             |
+  bb.0:
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %1:sgpr_64 = IMPLICIT_DEF
+    S_CBRANCH_EXECZ %bb.9, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.8, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.2:
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.3:
+    ; CHECK: Cycle heart must dominate all blocks in the cycle.
+    ; Irreducible cycle: entries(bb.4 bb.3)
+    %3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_BRANCH %bb.3
+
+  bb.5:
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.6:
+    S_BRANCH %bb.7
+
+  bb.7:
+    ; CHECK: Cycle heart must dominate all blocks in the cycle.
+    ; Reducible cycle: entries(bb.6) bb.7
+    %4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.6
+
+  bb.8:
+    ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition.
+    %5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    %6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64
+    S_BRANCH %bb.8
+
+  bb.9:
+    ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition.
+    %7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64
+    S_BRANCH %bb.9
+
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg
new file mode 100644
index 0000000..7c49242
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AMDGPU" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir
new file mode 100644
index 0000000..c70a48b
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir
@@ -0,0 +1,15 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            mixed2
+body:             |
+  bb.0:
+    %0:sgpr_64 = IMPLICIT_DEF
+    %1:sgpr_64 = SI_CALL %0, 1
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: CONVERGENCECTRL_ANCHOR
+    %2:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64
+
+...
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir
new file mode 100644
index 0000000..9e869ac
--- /dev/null
+++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir
@@ -0,0 +1,24 @@
+# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
+---
+name:            region_nesting
+body:             |
+  bb.0:
+    %0:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %1:sgpr_64 = CONVERGENCECTRL_ANCHOR
+    %2:sgpr_64 = IMPLICIT_DEF
+    %3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64
+    ; CHECK: Convergence region is not well-nested.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2
+    %4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64
+    S_CBRANCH_EXECZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    %5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64
+
+  bb.2:
+    ; CHECK: Convergence region is not well-nested.
+    ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4
+    %6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64
+
+...
author	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>	2024-03-04 11:31:27 +0530
committer	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>	2024-03-04 13:28:04 +0530
commit	c7fdd8c11e54585dc9d15d63de9742067e0506b9 (patch)
tree	a85bf57fcacb1f8aab192bef09e6a3e535434cec /llvm/test/MachineVerifier
parent	63725ab1196ac50509ad382fc12c56f6d8b5d874 (diff)
download	llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.zip llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.tar.gz llvm-c7fdd8c11e54585dc9d15d63de9742067e0506b9.tar.bz2