aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2021-12-22 16:57:44 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2021-12-22 16:57:44 +0000
commit4639461531e7325458dc6a37bc6d857c0df109f7 (patch)
tree540c00e218e10d14e605c89a1f13459854f410b7
parent09b53296cf1649d6f953e71d1c3cd970ad74dde8 (diff)
downloadllvm-4639461531e7325458dc6a37bc6d857c0df109f7.zip
llvm-4639461531e7325458dc6a37bc6d857c0df109f7.tar.gz
llvm-4639461531e7325458dc6a37bc6d857c0df109f7.tar.bz2
[DAG][X86] Add TargetLowering::isSplatValueForTargetNode override
Add callback to enable us to test target nodes if they are splat vectors Added some basic X86ISD::VBROADCAST + X86ISD::VBROADCAST_LOAD handling
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp13
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/test/CodeGen/X86/pr15296.ll22
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-256.ll44
7 files changed, 78 insertions, 46 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 83ce3d0..b2d82e0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3620,6 +3620,13 @@ public:
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const;
+
+ /// Return true if vector \p Op has the same value across all \p DemandedElts,
+ /// indicating any elements which may be undef in the output \p UndefElts.
+ virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth = 0) const;
+
struct DAGCombinerInfo {
void *DC; // The DAG Combiner object.
CombineLevel Level;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0e2fe8b..2ae0d4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2500,6 +2500,7 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt &UndefElts, unsigned Depth) const {
+ unsigned Opcode = V.getOpcode();
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
@@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
// Deal with some common cases here that work for both fixed and scalable
// vector types.
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
? APInt::getAllOnes(DemandedElts.getBitWidth())
@@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
- }
+ default:
+ if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID)
+ return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth);
+ break;
+}
// We don't support other cases than those above for scalable vectors at
// the moment.
@@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
UndefElts = APInt::getZero(NumElts);
- switch (V.getOpcode()) {
+ switch (Opcode) {
case ISD::BUILD_VECTOR: {
SDValue Scl;
for (unsigned i = 0; i != NumElts; ++i) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3d29e3f..4a0b23f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return false;
}
+bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use isSplatValue if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c83bee..ef84904 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41050,6 +41050,28 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth);
}
+bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const {
+ unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned Opc = Op.getOpcode();
+
+ switch (Opc) {
+ case X86ISD::VBROADCAST:
+ case X86ISD::VBROADCAST_LOAD:
+ // TODO: Permit vXi64 types on 32-bit targets.
+ if (isTypeLegal(Op.getValueType().getVectorElementType())) {
+ UndefElts = APInt::getNullValue(NumElts);
+ return true;
+ }
+ return false;
+ }
+
+ return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
+ Depth);
+};
+
// Helper to peek through bitops/trunc/setcc to determine size of source vector.
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2ea3477..d1d6e31 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1159,6 +1159,10 @@ namespace llvm {
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const override;
+ bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
+ APInt &UndefElts,
+ unsigned Depth) const override;
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll
index 71034f6..8476b76 100644
--- a/llvm/test/CodeGen/X86/pr15296.ll
+++ b/llvm/test/CodeGen/X86/pr15296.ll
@@ -27,27 +27,11 @@ define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i
; CHECK-LABEL: shiftInput___canonical:
; CHECK: # %bb.0: # %allocas
; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT: vpsrld %xmm2, %xmm3, %xmm4
-; CHECK-NEXT: vpsrlq $32, %xmm1, %xmm5
-; CHECK-NEXT: vpsrld %xmm5, %xmm3, %xmm6
-; CHECK-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
-; CHECK-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; CHECK-NEXT: vpblendw {{.*#+}} xmm6 = xmm1[0,1],xmm6[2,3,4,5,6,7]
-; CHECK-NEXT: vpsrld %xmm6, %xmm3, %xmm7
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; CHECK-NEXT: vpsrld %xmm1, %xmm3, %xmm3
-; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7]
-; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
-; CHECK-NEXT: vpsrld %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vpsrld %xmm5, %xmm0, %xmm4
-; CHECK-NEXT: vpsrld %xmm6, %xmm0, %xmm5
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
+; CHECK-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm2[4,5,6,7]
-; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7]
-; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; CHECK-NEXT: retl
allocas:
%smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 0d0f410..e6c802e 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -2203,39 +2203,35 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX1-LABEL: PR52719:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero
-; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
+; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; X86-AVX1-NEXT: # xmm4 = mem[0,0]
-; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3,4,5,6,7]
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm5
; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm6
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1,2,3],xmm5[4,5,6,7]
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
-; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm7, %xmm3
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm7, %xmm7
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm7[0,1,2,3],xmm3[4,5,6,7]
-; X86-AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpsubq %xmm5, %xmm3, %xmm3
-; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm4
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2
-; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; X86-AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm6, %xmm1
+; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2
+; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; X86-AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm2
+; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl