aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorpeterbell10 <peterbell10@openai.com>2024-11-20 21:06:57 +0000
committerGitHub <noreply@github.com>2024-11-20 13:06:57 -0800
commita3f2e01c95df67126ab5a75eca1b47e207486bee (patch)
treefe2ec9d479236e212bc2654bf603c1669624bffb /llvm/lib
parente468653ee77cd8e0268e5e8d83d5430114f1f4c8 (diff)
downloadllvm-a3f2e01c95df67126ab5a75eca1b47e207486bee.zip
llvm-a3f2e01c95df67126ab5a75eca1b47e207486bee.tar.gz
llvm-a3f2e01c95df67126ab5a75eca1b47e207486bee.tar.bz2
[InstCombine] Only fold extract element to trunc if vector `hasOneUse` (#115627)
This fixes a missed optimization caused by the `foldBitcastExtElt` pattern interfering with other combine patterns. In the case I was hitting, we have IR that combines two vectors into a new larger vector by extracting elements and inserting them into the new vector. ```llvm define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) { %avec = bitcast i32 %a to <2 x half> %a0 = extractelement <2 x half> %avec, i32 0 %a1 = extractelement <2 x half> %avec, i32 1 %bvec = bitcast i32 %b to <2 x half> %b0 = extractelement <2 x half> %bvec, i32 0 %b1 = extractelement <2 x half> %bvec, i32 1 %ins0 = insertelement <4 x half> undef, half %a0, i32 0 %ins1 = insertelement <4 x half> %ins0, half %a1, i32 1 %ins2 = insertelement <4 x half> %ins1, half %b0, i32 2 %ins3 = insertelement <4 x half> %ins2, half %b1, i32 3 ret <4 x half> %ins3 } ``` With the current behavior, `InstCombine` converts each vector extract sequence to ```llvm %tmp = trunc i32 %a to i16 %a0 = bitcast i16 %tmp to half %a1 = extractelement <2 x half> %avec, i32 1 ``` where the extraction of `%a0` is now done by truncating the original integer. While on it's own this is fairly reasonable, in this case it also blocks the pattern which converts `extractelement` - `insertelement` into shuffles which gives the overall simpler result: ```llvm define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) { %avec = bitcast i32 %a to <2 x half> %bvec = bitcast i32 %b to <2 x half> %ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x half> %ins3 } ``` In this PR I fix the conflict by obeying the `hasOneUse` check even if there is no shift instruction required. In these cases we can't remove the vector completely, so the pattern has less benefit anyway. Also fwiw, I think dropping the `hasOneUse` check for the 0th element might have been a mistake in the first place. Looking at https://github.com/llvm/llvm-project/commit/535c5d56a7bc9966036a11362d8984983a4bf090 the commit message only mentions loosening the `isDesirableIntType` requirement and doesn't mention changing the `hasOneUse` check at all.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp6
1 files changed, 3 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ede89b0..f56414a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -205,9 +205,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
if (IsBigEndian)
ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
unsigned ShiftAmountC = ExtIndexC * DestWidth;
- if (!ShiftAmountC ||
- (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
- Ext.getVectorOperand()->hasOneUse())) {
+ if ((!ShiftAmountC ||
+ isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
+ Ext.getVectorOperand()->hasOneUse()) {
if (ShiftAmountC)
X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
if (DestTy->isFloatingPointTy()) {