From c7bb3665a1c4a06754e486d8567182821fa32b55 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 28 Jan 2022 14:42:18 +0000
Subject: [X86] SimplifyDemandedBitsForTargetNode - fold MOVMSK(YMM) ->
 MOVMSK(XMM)

If we don't demand the upper elements of the 256-bit vector, then just perform as a 128-bit vector
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'llvm/lib')

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99ef69d..450e594 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41043,6 +41043,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
 
+    // See if we only demand bits from the lower 128-bit vector.
+    if (SrcVT.is256BitVector() &&
+        OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) {
+      SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
+      return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
+    }
+
     // Only demand the vector elements of the sign bits we need.
     APInt KnownUndef, KnownZero;
     APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
-- 
cgit v1.1