diff options
author | Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com> | 2024-02-21 10:06:37 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-21 10:06:37 +0530 |
commit | 79889734b940356ab3381423c93ae06f22e772c9 (patch) | |
tree | d0265e450f2cb9a389e62d0e6a3a70aa339a7084 /llvm/test/MachineVerifier | |
parent | 03203b79c6247465850ee6e9f3e2399afc35720b (diff) | |
download | llvm-79889734b940356ab3381423c93ae06f22e772c9.zip llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.gz llvm-79889734b940356ab3381423c93ae06f22e772c9.tar.bz2 |
Implement convergence control in MIR using SelectionDAG (#71785)
LLVM function calls carry convergence control tokens as operand bundles, where
the tokens themselves are produced by convergence control intrinsics. This patch
implements convergence control tokens in MIR as follows:
1. Introduce target-independent ISD opcodes and MIR opcodes for convergence
control intrinsics.
2. Model token values as untyped virtual registers in MIR.
The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a
corresponding machine opcode with the same spelling. This glues the convergence
control token to SDNodes that represent calls to intrinsics. The glued token is
later translated to an implicit argument in the MIR.
The lowering of calls to user-defined functions is target-specific. On AMDGPU,
the convergence control operand bundle at a non-intrinsic call is translated to
an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment
converts this explicit argument to an implicit argument on the SI_CALL
instruction.
Diffstat (limited to 'llvm/test/MachineVerifier')
6 files changed, 141 insertions, 0 deletions
diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir new file mode 100644 index 0000000..94d0dda --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir @@ -0,0 +1,37 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: basic +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2; + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + ; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block. + ; CHECK: CONVERGENCECTRL_ENTRY + %1:sgpr_64 = CONVERGENCECTRL_ENTRY + ; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block. + ; CHECK: CONVERGENCECTRL_LOOP + %2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2; + ; CHECK: Entry intrinsic can occur only in the entry block. + ; CHECK: CONVERGENCECTRL_ENTRY + %5:sgpr_64 = CONVERGENCECTRL_ENTRY + + bb.2: + ; CHECK: Convergence control tokens can only be used by convergent operations. + ; CHECK: G_PHI + %6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1 + %7:sgpr_64 = CONVERGENCECTRL_ANCHOR + %8:sgpr_64 = IMPLICIT_DEF + %4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64 + ; CHECK: An operation can use at most one convergence control token. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64 + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3 + %10:sgpr_64 = SI_CALL %8:sgpr_64, 3 +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir new file mode 100644 index 0000000..87cf3e6 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir @@ -0,0 +1,52 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: cycles +body: | + bb.0: + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %1:sgpr_64 = IMPLICIT_DEF + S_CBRANCH_EXECZ %bb.9, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.8, implicit $exec + S_BRANCH %bb.5 + + bb.2: + S_CBRANCH_EXECZ %bb.3, implicit $exec + S_BRANCH %bb.4 + + bb.3: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Irreducible cycle: entries(bb.4 bb.3) + %3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.4 + + bb.4: + S_BRANCH %bb.3 + + bb.5: + S_CBRANCH_EXECZ %bb.6, implicit $exec + S_BRANCH %bb.2 + + bb.6: + S_BRANCH %bb.7 + + bb.7: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Reducible cycle: entries(bb.6) bb.7 + %4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.6 + + bb.8: + ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition. + %5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + %6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.8 + + bb.9: + ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition. + %7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64 + S_BRANCH %bb.9 + +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg new file mode 100644 index 0000000..7c49242 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AMDGPU" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir new file mode 100644 index 0000000..c70a48b --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir @@ -0,0 +1,15 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: mixed2 +body: | + bb.0: + %0:sgpr_64 = IMPLICIT_DEF + %1:sgpr_64 = SI_CALL %0, 1 + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: CONVERGENCECTRL_ANCHOR + %2:sgpr_64 = CONVERGENCECTRL_ANCHOR + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64 + +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir new file mode 100644 index 0000000..b3834f4 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir @@ -0,0 +1,11 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: not_ssa +tracksRegLiveness: true +body: | + bb.0: + ; CHECK: Convergence control requires SSA. + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %8:sgpr_64 = IMPLICIT_DEF + %8:sgpr_64 = IMPLICIT_DEF +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir new file mode 100644 index 0000000..9e869ac --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir @@ -0,0 +1,24 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: region_nesting +body: | + bb.0: + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %1:sgpr_64 = CONVERGENCECTRL_ANCHOR + %2:sgpr_64 = IMPLICIT_DEF + %3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64 + ; CHECK: Convergence region is not well-nested. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64 + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + %5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64 + + bb.2: + ; CHECK: Convergence region is not well-nested. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4 + %6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64 + +... |