[AMDGPU] Split R600 and GCN bfe patterns

This is in preparation for making the GCN patterns divergence-aware. NFC. Differential Revision: https://reviews.llvm.org/D88579
author: Jay Foad <jay.foad@amd.com> 2020-09-25 15:55:02 +0100
committer: Jay Foad <jay.foad@amd.com> 2020-10-05 09:55:10 +0100
commit: 0d5989bb24934802a9e6fcca63848a57a91efcc8 (patch)
tree: 056eb514ac884d3c6fb8015500eeed5b2d15a71f
parent: 64b879ae2a8a4a4e541404c19d96d18c4aed810e (diff)
download: llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.zip
llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.gz
llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.bz2
3 files changed, 88 insertions, 49 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 23e47c6..48b82ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -597,53 +597,6 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
   (vt rc:$addr)
 >;
 
-// Bitfield extract patterns
-
-def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
-  return isMask_32(Imm);
-}]>;
-
-def IMMPopCount : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
-                                   MVT::i32);
-}]>;
-
-multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
-  def : AMDGPUPat <
-    (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
-    (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
-  >;
-
-  // x & ((1 << y) - 1)
-  def : AMDGPUPat <
-    (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
-    (UBFE $src, (MOV (i32 0)), $width)
-  >;
-
-  // x & ~(-1 << y)
-  def : AMDGPUPat <
-    (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
-    (UBFE $src, (MOV (i32 0)), $width)
-  >;
-
-  // x & (-1 >> (bitwidth - y))
-  def : AMDGPUPat <
-    (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
-    (UBFE $src, (MOV (i32 0)), $width)
-  >;
-
-  // x << (bitwidth - y) >> (bitwidth - y)
-  def : AMDGPUPat <
-    (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
-    (UBFE $src, (MOV (i32 0)), $width)
-  >;
-
-  def : AMDGPUPat <
-    (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
-    (SBFE $src, (MOV (i32 0)), $width)
-  >;
-}
-
 // fshr pattern
 class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
   (fshr i32:$src0, i32:$src1, i32:$src2),
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index a2782bf..cd9c056 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -30,6 +30,15 @@ class EGOrCaymanPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
   let SubtargetPredicate = isEGorCayman;
 }
 
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+  return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Evergreen / Cayman store instructions
 //===----------------------------------------------------------------------===//
@@ -394,7 +403,41 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
   VecALU
 >;
 
-defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
+// Bitfield extract patterns
+
+def : AMDGPUPat <
+  (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask),
+  (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask))))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+  (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+  (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+  (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+  (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+  (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+  (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+  (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+  (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+def : AMDGPUPat <
+  (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+  (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
 
 def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
   [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 817fa0b..d55cf0f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2315,7 +2315,50 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
 defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
 // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
 
-defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
+// Bitfield extract patterns
+
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+  return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
+}]>;
+
+def : AMDGPUPat <
+  (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask),
+  (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask)))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+  (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+  (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+  (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+  (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+  (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+  (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+  (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+  (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+def : AMDGPUPat <
+  (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+  (V_BFE_I32 $src, (i32 0), $width)
+>;
 
 // SHA-256 Ma patterns
author	Jay Foad <jay.foad@amd.com>	2020-09-25 15:55:02 +0100
committer	Jay Foad <jay.foad@amd.com>	2020-10-05 09:55:10 +0100
commit	0d5989bb24934802a9e6fcca63848a57a91efcc8 (patch)
tree	056eb514ac884d3c6fb8015500eeed5b2d15a71f
parent	64b879ae2a8a4a4e541404c19d96d18c4aed810e (diff)
download	llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.zip llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.gz llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.bz2