aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-09-17 20:32:45 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-09-17 20:32:45 +0000
commit61116ddc7bd7953fd90455927f8ca60e3fd713c2 (patch)
tree48c002732fb9db84da254dfdce31bd886dea1585 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
parentb83278687dcab59646e43e6f6907d8cef7585692 (diff)
downloadllvm-61116ddc7bd7953fd90455927f8ca60e3fd713c2.zip
llvm-61116ddc7bd7953fd90455927f8ca60e3fd713c2.tar.gz
llvm-61116ddc7bd7953fd90455927f8ca60e3fd713c2.tar.bz2
[InstCombine] Added vector demanded bits support for SSE4A EXTRQ/INSERTQ instructions
The SSE4A instructions EXTRQ/INSERTQ only use the lower 64-bits (or less) for many of their input vector operands and all of them have undefined upper 64-bits results. Differential Revision: http://reviews.llvm.org/D12680 llvm-svn: 247934
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp11
1 files changed, 10 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 142e071..c68a031 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -412,7 +412,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Value *LHS, *RHS;
if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
-
+
if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
@@ -1237,6 +1237,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+
+ // SSE4A instructions leave the upper 64-bits of the 128-bit result
+ // in an undefined state.
+ case Intrinsic::x86_sse4a_extrq:
+ case Intrinsic::x86_sse4a_extrqi:
+ case Intrinsic::x86_sse4a_insertq:
+ case Intrinsic::x86_sse4a_insertqi:
+ UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+ break;
}
break;
}