aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-16 11:48:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-16 11:48:51 +0000
commitf159a3414fc07793cd22bb1326b2e59f12059a29 (patch)
treea0c5793f8ab83986fd9c5924d906130b8ebf411e /llvm
parenta81719fbfca8e9a5cab00cbc425d9a1ffa3c1f89 (diff)
downloadllvm-f159a3414fc07793cd22bb1326b2e59f12059a29.zip
llvm-f159a3414fc07793cd22bb1326b2e59f12059a29.tar.gz
llvm-f159a3414fc07793cd22bb1326b2e59f12059a29.tar.bz2
[X86][SSE] Combine shuffles to MOVSS/MOVSD whatever the domain.
We already do the same thing in shuffle lowering; but don't do it if we have SSE41 (PBLEND) instead. llvm-svn: 289937
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll8
2 files changed, 11 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2b496b04..1de40c1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26072,17 +26072,17 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
ShuffleVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {0, 3}) && FloatDomain) {
- if (Subtarget.hasSSE2()) {
- std::swap(V1, V2);
- Shuffle = X86ISD::MOVSD;
- ShuffleVT = MVT::v2f64;
- return true;
- }
+ if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
+ (FloatDomain || !Subtarget.hasSSE41())) {
+ std::swap(V1, V2);
+ Shuffle = X86ISD::MOVSD;
+ ShuffleVT = MaskVT;
+ return true;
}
- if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
+ (FloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
- ShuffleVT = MVT::v4f32;
+ ShuffleVT = MaskVT;
return true;
}
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 88f1202..a65d830 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1804,9 +1804,7 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test1c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
@@ -1815,9 +1813,7 @@ define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
;
; SSSE3-LABEL: combine_test1c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]