diff options
author | Bjorn Pettersson <bjorn.a.pettersson@ericsson.com> | 2020-12-28 14:28:55 +0100 |
---|---|---|
committer | Bjorn Pettersson <bjorn.a.pettersson@ericsson.com> | 2021-01-14 11:30:33 +0100 |
commit | d58512b2e31a255dccc2c9a351a4e47b2b4c9f79 (patch) | |
tree | a5af8b9c601179d96aed6739b6d3bf9bc1d66f4e | |
parent | 53e3b81faaf32a495189182e0e4d635cbe19c5dd (diff) | |
download | llvm-d58512b2e31a255dccc2c9a351a4e47b2b4c9f79.zip llvm-d58512b2e31a255dccc2c9a351a4e47b2b4c9f79.tar.gz llvm-d58512b2e31a255dccc2c9a351a4e47b2b4c9f79.tar.bz2 |
[SLP] Don't vectorize stores of non-packed types (like i1, i2)
In the spirit of commit fc783e91e0c0696e (llvm-svn: 248943) we
shouldn't vectorize stores of non-packed types (i.e. types that
has padding between consecutive variables in a scalar layout,
but being packed in a vector layout).
The problem was detected as a miscompile in a downstream test case.
Reviewed By: anton-afanasyev
Differential Revision: https://reviews.llvm.org/D94446
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll | 11 |
2 files changed, 14 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b3a3d65..0f3f74b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3094,6 +3094,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, case Instruction::Store: { // Check if the stores are consecutive or if we need to swizzle them. llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType(); + // Avoid types that are padded when being allocated as scalars, while + // being packed together in a vector (such as i1). + if (DL->getTypeSizeInBits(ScalarTy) != + DL->getTypeAllocSizeInBits(ScalarTy)) { + BS.cancelScheduling(VL, VL0); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n"); + return; + } // Make sure all stores in the bundle are simple - we can't vectorize // atomic or volatile stores. SmallVector<Value *, 4> PointerOps(VL.size()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll index 93a97c3..8c0b9b1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll @@ -113,7 +113,6 @@ declare void @f(i64, i64) define void @test4(i32 %a, i28* %ptr) { ; Check that we do not vectorize types that are padded to a bigger ones. -; FIXME: This is not correct! See D94446. ; ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: @@ -121,12 +120,10 @@ define void @test4(i32 %a, i28* %ptr) { ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i28, i28* [[PTR:%.*]], i32 1 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i28, i28* [[PTR]], i32 2 ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i28, i28* [[PTR]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i28> poison, i28 [[TRUNC]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i28> [[TMP0]], i28 [[TRUNC]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i28> [[TMP1]], i28 [[TRUNC]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i28> [[TMP2]], i28 [[TRUNC]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i28* [[PTR]] to <4 x i28>* -; CHECK-NEXT: store <4 x i28> [[TMP3]], <4 x i28>* [[TMP4]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[PTR]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP1]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP2]], align 4 +; CHECK-NEXT: store i28 [[TRUNC]], i28* [[GEP3]], align 4 ; CHECK-NEXT: ret void ; entry: |