diff options
| author | Han-Kuan Chen <hankuan.chen@sifive.com> | 2024-08-27 16:11:52 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-27 16:11:52 +0800 |
| commit | 3d1c63ee2c2707be7150d105ead388cb2473a1cf (patch) | |
| tree | f3dd579cfe1c5d528320b10b741573ea4bb822bb | |
| parent | 57c1e21903d3446073af8c2fb4bbdea00dc7c953 (diff) | |
| download | llvm-3d1c63ee2c2707be7150d105ead388cb2473a1cf.zip llvm-3d1c63ee2c2707be7150d105ead388cb2473a1cf.tar.gz llvm-3d1c63ee2c2707be7150d105ead388cb2473a1cf.tar.bz2 | |
[SLP][REVEC] Expand getelementptr into vector form. (#103704)
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll | 43 |
2 files changed, 64 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ed47ed6..4da6521 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13765,6 +13765,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } + if (isa<FixedVectorType>(ScalarTy)) { + assert(SLPReVec && "FixedVectorType is not expected."); + // CreateMaskedGather expects VecTy and VecPtr have same size. We need + // to expand VecPtr if ScalarTy is a vector type. + unsigned ScalarTyNumElements = + cast<FixedVectorType>(ScalarTy)->getNumElements(); + unsigned VecTyNumElements = + cast<FixedVectorType>(VecTy)->getNumElements(); + assert(VecTyNumElements % ScalarTyNumElements == 0 && + "Cannot expand getelementptr."); + unsigned VF = VecTyNumElements / ScalarTyNumElements; + SmallVector<Constant *> Indices(VecTyNumElements); + transform(seq(VecTyNumElements), Indices.begin(), [=](unsigned I) { + return Builder.getInt64(I % ScalarTyNumElements); + }); + VecPtr = Builder.CreateGEP( + VecTy->getElementType(), + Builder.CreateShuffleVector( + VecPtr, createReplicatedMask(ScalarTyNumElements, VF)), + ConstantVector::get(Indices)); + } // Use the minimum alignment of the gathered loads. Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll new file mode 100644 index 0000000..d822a24 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_END_I87:%.*]] +; CHECK: if.end.i87: +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> poison, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP1]], <2 x i32> zeroinitializer, i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> +; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ +; CHECK-NEXT: i32 1, label [[SW_BB509_I]] +; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]] +; CHECK-NEXT: ] +; CHECK: if.then458.i: +; CHECK-NEXT: br label [[SW_BB509_I]] +; CHECK: sw.bb509.i: +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ] +; CHECK-NEXT: ret i32 0 +; +entry: + %getelementptr0 = getelementptr i8, ptr null, i64 64036 + %getelementptr1 = getelementptr i8, ptr null, i64 64064 + br label %if.end.i87 + +if.end.i87: ; preds = %entry + %0 = load <2 x i32>, ptr %getelementptr0, align 4 + %1 = load <2 x i32>, ptr %getelementptr1, align 8 + switch i32 0, label %sw.bb509.i [ + i32 1, label %sw.bb509.i + i32 0, label %if.then458.i + ] + +if.then458.i: ; preds = %if.end.i87 + br label %sw.bb509.i + +sw.bb509.i: ; preds = %if.then458.i, %if.end.i87, %if.end.i87 + %4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ] + %5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ] + ret i32 0 +} |
