diff options
author | Jay Foad <jay.foad@amd.com> | 2020-09-25 15:55:02 +0100 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2020-10-05 09:55:10 +0100 |
commit | 0d5989bb24934802a9e6fcca63848a57a91efcc8 (patch) | |
tree | 056eb514ac884d3c6fb8015500eeed5b2d15a71f | |
parent | 64b879ae2a8a4a4e541404c19d96d18c4aed810e (diff) | |
download | llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.zip llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.gz llvm-0d5989bb24934802a9e6fcca63848a57a91efcc8.tar.bz2 |
[AMDGPU] Split R600 and GCN bfe patterns
This is in preparation for making the GCN patterns divergence-aware.
NFC.
Differential Revision: https://reviews.llvm.org/D88579
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 47 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/EvergreenInstructions.td | 45 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 45 |
3 files changed, 88 insertions, 49 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 23e47c6..48b82ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -597,53 +597,6 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < (vt rc:$addr) >; -// Bitfield extract patterns - -def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{ - return isMask_32(Imm); -}]>; - -def IMMPopCount : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), - MVT::i32); -}]>; - -multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> { - def : AMDGPUPat < - (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), - (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) - >; - - // x & ((1 << y) - 1) - def : AMDGPUPat < - (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & ~(-1 << y) - def : AMDGPUPat < - (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & (-1 >> (bitwidth - y)) - def : AMDGPUPat < - (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x << (bitwidth - y) >> (bitwidth - y) - def : AMDGPUPat < - (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - def : AMDGPUPat < - (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (SBFE $src, (MOV (i32 0)), $width) - >; -} - // fshr pattern class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat < (fshr i32:$src0, i32:$src1, i32:$src2), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index a2782bf..cd9c056 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -30,6 +30,15 @@ class EGOrCaymanPat<dag pattern, dag result> : AMDGPUPat<pattern, result> { let SubtargetPredicate = isEGorCayman; } +def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{ + return isMask_32(Imm); +}]>; + +def IMMPopCount : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// @@ -394,7 +403,41 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT", VecALU >; -defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>; +// Bitfield extract patterns + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask)))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))], diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 817fa0b..d55cf0f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2315,7 +2315,50 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>; // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>; -defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>; +// Bitfield extract patterns + +def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{ + return isMask_32(Imm); +}]>; + +def IMMPopCount : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_I32 $src, (i32 0), $width) +>; // SHA-256 Ma patterns |