[X86] canonicalizeShuffleWithOp - don't fold VPERMI(BINOP(X,Y)) -> BINOP(VPERMI(X),VPERMI(Y))

VPERMI (VPERMQ/PD) is nearly always lane-crossing and poorly merges with target shuffles (other than itself). For now, I've restricted VPERMI to only merge with itself, constants, loads and splats. We might be able to merge with a few other special cases (AND/ANDNP with constant?), which could help the shuffle-vs-trunc-256.ll AVX512VL regression, but since that now gives similar codegen to the other AVX512 variants, I'd prefer to improve the shuffle lowering for that properly.
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2024-03-28 19:49:02 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2024-04-02 18:38:37 +0100
commit: 8bc2d19c13c3a0aa0bffe82596f75f25bed5004f (patch)
tree: a1f8490a372e1a3149684483889fbbcd3a19880f /llvm/lib/Target
parent: 82be6e186b5f88779325c5ede99ac714b2cfc4fa (diff)
download: llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.zip
llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.gz
llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.bz2
1 files changed, 9 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 312e448..a9751e1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39961,8 +39961,10 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
                                          const SDLoc &DL) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT ShuffleVT = N.getValueType();
+  unsigned Opc = N.getOpcode();
 
-  auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) {
+  auto IsMergeableWithShuffle = [Opc, &DAG](SDValue Op, bool FoldShuf = true,
+                                            bool FoldLoad = false) {
     // AllZeros/AllOnes constants are freely shuffled and will peek through
     // bitcasts. Other constant build vectors do not peek through bitcasts. Only
     // merge with target shuffles if it has one use so shuffle combining is
@@ -39972,8 +39974,9 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
            ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
            ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
            getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)) ||
+           (Op.getOpcode() == Opc && Op->hasOneUse()) ||
            (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||
-           (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
+           (FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
            (FoldLoad && isShuffleFoldableLoad(Op)) ||
            DAG.isSplatValue(Op, /*AllowUndefs*/ false);
   };
@@ -39984,7 +39987,6 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
            (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits());
   };
 
-  unsigned Opc = N.getOpcode();
   switch (Opc) {
   // Unary and Unary+Permute Shuffles.
   case X86ISD::PSHUFB: {
@@ -40010,8 +40012,10 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
       if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
         SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
         SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
-        if (IsMergeableWithShuffle(Op00, Opc != X86ISD::PSHUFB) ||
-            IsMergeableWithShuffle(Op01, Opc != X86ISD::PSHUFB)) {
+        if (IsMergeableWithShuffle(Op00, Opc != X86ISD::VPERMI,
+                                   Opc != X86ISD::PSHUFB) ||
+            IsMergeableWithShuffle(Op01, Opc != X86ISD::VPERMI,
+                                   Opc != X86ISD::PSHUFB)) {
           SDValue LHS, RHS;
           Op00 = DAG.getBitcast(ShuffleVT, Op00);
           Op01 = DAG.getBitcast(ShuffleVT, Op01);
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2024-03-28 19:49:02 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2024-04-02 18:38:37 +0100
commit	8bc2d19c13c3a0aa0bffe82596f75f25bed5004f (patch)
tree	a1f8490a372e1a3149684483889fbbcd3a19880f /llvm/lib/Target
parent	82be6e186b5f88779325c5ede99ac714b2cfc4fa (diff)
download	llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.zip llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.gz llvm-8bc2d19c13c3a0aa0bffe82596f75f25bed5004f.tar.bz2