aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorPierre van Houtryve <pierre.vanhoutryve@amd.com>2023-01-24 10:33:29 +0100
committerpvanhout <pierre.vanhoutryve@amd.com>2023-02-10 08:34:23 +0100
commitd9a6fc82f56f1e4ebb6ba053a57a5839c3907a7e (patch)
tree0e645a2535d14b97d78b3caec02a320edd8aceb6 /llvm/lib
parent90f5176ab2c6f46449c9a7050f7269a7356f7a41 (diff)
downloadllvm-d9a6fc82f56f1e4ebb6ba053a57a5839c3907a7e.zip
llvm-d9a6fc82f56f1e4ebb6ba053a57a5839c3907a7e.tar.gz
llvm-d9a6fc82f56f1e4ebb6ba053a57a5839c3907a7e.tar.bz2
[AMDGPU] Run unmerge combines post regbankselect
RegBankSelect can insert G_UNMERGE_VALUES in a lot of places which left us with a lot of unmerge/merge pairs that could be simplified. These often got in the way of pattern matching and made codegen worse. This patch: - Makes the necessary changes to the merge/unmerge combines so they can run post RegBankSelect - Adds relevant unmerge combines to the list of RegBankSelect combines for AMDGPU - Updates some tablegen patterns that were missing explicit cross-regbank copies (V_BFI patterns were causing constant bus violations with this change). This seems to be mostly beneficial for code quality. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D142192
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td13
3 files changed, 21 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 7b85f2e..5fc2674 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1763,6 +1763,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
if (CanReuseInputDirectly)
replaceRegWith(MRI, DstReg, SrcReg);
else
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c11d465..539b566 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -127,7 +127,8 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
def AMDGPURegBankCombinerHelper : GICombinerHelper<
"AMDGPUGenRegBankCombinerHelper",
- [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
+ [unmerge_merge, unmerge_cst, unmerge_undef,
+ zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2c10cdc..012459f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2055,7 +2055,9 @@ def BFIImm32 : PatFrag<
// (y & x) | (z & ~x)
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
+ (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32))
>;
// (y & C) | (z & ~C)
@@ -2080,7 +2082,9 @@ def : AMDGPUPat <
// z ^ (x & (y ^ z))
def : AMDGPUPat <
(DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
+ (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32))
>;
// 64-bit version
@@ -3196,7 +3200,10 @@ def : AMDGPUPat <
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$x, i32:$z),
(and i32:$y, (or i32:$x, i32:$z))),
- (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y)
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32))
>;
def : AMDGPUPat <