diff options
author | Craig Topper <craig.topper@intel.com> | 2020-01-07 11:09:33 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2020-01-07 11:22:04 -0800 |
commit | afa8211e979c25100c2ed41d8da1e18b45d0ef2b (patch) | |
tree | bfab69376fbbd6763e7257f8c2b72c6e4eeb46a6 | |
parent | b9376690a011765e35d9ca63abe0e7117985f1ed (diff) | |
download | llvm-afa8211e979c25100c2ed41d8da1e18b45d0ef2b.zip llvm-afa8211e979c25100c2ed41d8da1e18b45d0ef2b.tar.gz llvm-afa8211e979c25100c2ed41d8da1e18b45d0ef2b.tar.bz2 |
[X86] Improve lowering of (v2i64 (setgt X, -1)) on pre-SSE2 targets. Enable v2i64 in foldVectorXorShiftIntoCmp.
Similar to D72302 but for the canonical form for the opposite case. I've changed foldVectorXorShiftIntoCmp to form a target independent setcc node instead of PCMPGT now and enabled its for v2i64 on pre-SSE4.2 targets. The setcc should eventually get lowered to PCMPGT or the new v2i64 sequence.
Differential Revision: https://reviews.llvm.org/D72318
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-pcmp.ll | 17 |
2 files changed, 21 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 43ce2de..806c7e1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21597,6 +21597,17 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, return DAG.getBitcast(VT, Result); } + if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) { + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getConstant(-1, dl, MVT::v4i32); + + SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); + static const int MaskHi[] = { 1, 1, 3, 3 }; + SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); + + return DAG.getBitcast(VT, Result); + } + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower // compare is always unsigned. @@ -40814,8 +40825,8 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, default: return SDValue(); case MVT::v16i8: case MVT::v8i16: - case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break; - case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break; + case MVT::v4i32: + case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -40839,7 +40850,7 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, // Create a greater-than comparison against -1. We don't use the more obvious // greater-than-or-equal-to-zero because SSE/AVX don't have that instruction. - return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); + return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); } /// Detect patterns of truncation with unsigned saturation: diff --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll index 89eaad8..2900ce2 100644 --- a/llvm/test/CodeGen/X86/vector-pcmp.ll +++ b/llvm/test/CodeGen/X86/vector-pcmp.ll @@ -61,10 +61,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %x) { define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { ; SSE2-LABEL: test_pcmpgtq: ; SSE2: # %bb.0: -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_pcmpgtq: @@ -187,13 +186,11 @@ define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) { define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { ; SSE2-LABEL: test_pcmpgtq_256: ; SSE2: # %bb.0: -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_pcmpgtq_256: |