aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-19 17:47:50 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-19 17:47:50 +0000
commit5b936ec89eb13b6911571b4c917f780ee733eaf7 (patch)
treee26ae522087a40008343c116ef153b0e68c316f9
parentb197667ebade63350382df9a7fd5cebb01bbd4d7 (diff)
downloadllvm-5b936ec89eb13b6911571b4c917f780ee733eaf7.zip
llvm-5b936ec89eb13b6911571b4c917f780ee733eaf7.tar.gz
llvm-5b936ec89eb13b6911571b4c917f780ee733eaf7.tar.bz2
[SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes
Only adds support to the existing 'large element' scalar/vector to 'small element' vector bitcasts. The next step would be to support cases where the large elements aren't all sign bits, and determine the small element equivalent based on the demanded elements. llvm-svn: 340143
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp9
-rw-r--r--llvm/test/CodeGen/X86/packss.ll70
2 files changed, 36 insertions, 43 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f93a2af..7b28a06d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3245,7 +3245,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Requires handling of DemandedElts and Endianness.
if ((SrcBits % VTBits) == 0) {
assert(Op.getValueType().isVector() && "Expected bitcast to vector");
- Tmp = ComputeNumSignBits(N0, Depth + 1);
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts(NumElts / Scale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
if (Tmp == SrcBits)
return VTBits;
}
diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll
index 3723ab4..a062365 100644
--- a/llvm/test/CodeGen/X86/packss.ll
+++ b/llvm/test/CodeGen/X86/packss.ll
@@ -107,29 +107,25 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE-NEXT: movdqa %xmm1, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE-NEXT: movdqa %xmm1, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X86-SSE-NEXT: movdqa %xmm1, %xmm2
+; X86-SSE-NEXT: movdqa %xmm0, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm1, %xmm4
+; X86-SSE-NEXT: movdqa %xmm0, %xmm4
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X86-SSE-NEXT: psrlq $63, %xmm4
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.940656e-324,-0.000000e+00]
-; X86-SSE-NEXT: xorpd %xmm2, %xmm1
-; X86-SSE-NEXT: psubq %xmm2, %xmm1
-; X86-SSE-NEXT: psrlq $63, %xmm3
-; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-SSE-NEXT: psrlq $63, %xmm3
+; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X86-SSE-NEXT: xorpd %xmm2, %xmm1
+; X86-SSE-NEXT: psubq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-SSE-NEXT: pslld $16, %xmm1
-; X86-SSE-NEXT: psrad $16, %xmm1
-; X86-SSE-NEXT: pslld $16, %xmm0
-; X86-SSE-NEXT: psrad $16, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE-NEXT: packssdw %xmm1, %xmm0
; X86-SSE-NEXT: retl
;
@@ -151,10 +147,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X86-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
;
@@ -167,37 +160,33 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE-NEXT: movdqa %xmm1, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm0, %xmm3
+; X64-SSE-NEXT: movdqa %xmm1, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm2
+; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm1, %xmm4
+; X64-SSE-NEXT: movdqa %xmm0, %xmm4
; X64-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X64-SSE-NEXT: psrlq $63, %xmm4
-; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X64-SSE-NEXT: movapd {{.*#+}} xmm2 = [1,9223372036854775808]
-; X64-SSE-NEXT: xorpd %xmm2, %xmm1
-; X64-SSE-NEXT: psubq %xmm2, %xmm1
-; X64-SSE-NEXT: psrlq $63, %xmm3
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X64-SSE-NEXT: xorpd %xmm2, %xmm0
; X64-SSE-NEXT: psubq %xmm2, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-SSE-NEXT: psrlq $63, %xmm3
+; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X64-SSE-NEXT: xorpd %xmm2, %xmm1
+; X64-SSE-NEXT: psubq %xmm2, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-SSE-NEXT: pslld $16, %xmm1
-; X64-SSE-NEXT: psrad $16, %xmm1
-; X64-SSE-NEXT: pslld $16, %xmm0
-; X64-SSE-NEXT: psrad $16, %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-SSE-NEXT: packssdw %xmm1, %xmm0
; X64-SSE-NEXT: retq
;
@@ -220,10 +209,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X64-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
@@ -234,9 +220,9 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X64-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; X64-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0>