diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-03-28 19:49:02 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-04-02 18:38:37 +0100 |
commit | 8bc2d19c13c3a0aa0bffe82596f75f25bed5004f (patch) | |
tree | a1f8490a372e1a3149684483889fbbcd3a19880f /llvm/lib/Target | |
parent | 82be6e186b5f88779325c5ede99ac714b2cfc4fa (diff) | |
download | llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.zip llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.gz llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.bz2 |
[X86] canonicalizeShuffleWithOp - don't fold VPERMI(BINOP(X,Y)) -> BINOP(VPERMI(X),VPERMI(Y))
VPERMI (VPERMQ/PD) is nearly always lane-crossing and poorly merges with target shuffles (other than itself).
For now, I've restricted VPERMI to only merge with itself, constants, loads and splats.
We might be able to merge with a few other special cases (AND/ANDNP with constant?), which could help the shuffle-vs-trunc-256.ll AVX512VL regression, but since that now gives similar codegen to the other AVX512 variants, I'd prefer to improve the shuffle lowering for that properly.
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 312e448..a9751e1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39961,8 +39961,10 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, const SDLoc &DL) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT ShuffleVT = N.getValueType(); + unsigned Opc = N.getOpcode(); - auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) { + auto IsMergeableWithShuffle = [Opc, &DAG](SDValue Op, bool FoldShuf = true, + bool FoldLoad = false) { // AllZeros/AllOnes constants are freely shuffled and will peek through // bitcasts. Other constant build vectors do not peek through bitcasts. Only // merge with target shuffles if it has one use so shuffle combining is @@ -39972,8 +39974,9 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) || getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)) || + (Op.getOpcode() == Opc && Op->hasOneUse()) || (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) || - (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) || + (FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) || (FoldLoad && isShuffleFoldableLoad(Op)) || DAG.isSplatValue(Op, /*AllowUndefs*/ false); }; @@ -39984,7 +39987,6 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits()); }; - unsigned Opc = N.getOpcode(); switch (Opc) { // Unary and Unary+Permute Shuffles. case X86ISD::PSHUFB: { @@ -40010,8 +40012,10 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG, if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) { SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); - if (IsMergeableWithShuffle(Op00, Opc != X86ISD::PSHUFB) || - IsMergeableWithShuffle(Op01, Opc != X86ISD::PSHUFB)) { + if (IsMergeableWithShuffle(Op00, Opc != X86ISD::VPERMI, + Opc != X86ISD::PSHUFB) || + IsMergeableWithShuffle(Op01, Opc != X86ISD::VPERMI, + Opc != X86ISD::PSHUFB)) { SDValue LHS, RHS; Op00 = DAG.getBitcast(ShuffleVT, Op00); Op01 = DAG.getBitcast(ShuffleVT, Op01); |