aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorHan-Kuan Chen <hankuan.chen@sifive.com>2025-04-15 23:03:55 +0800
committerGitHub <noreply@github.com>2025-04-15 23:03:55 +0800
commitbcfc9f452983f9b0b14f2cda8f4d212a0821c972 (patch)
tree8d6bbc9fec379c693802c9b549aee9951b1b2385 /llvm/lib
parent9ab2dea61eee82faae8b85c99e889026e8b4f2c1 (diff)
downloadllvm-bcfc9f452983f9b0b14f2cda8f4d212a0821c972.zip
llvm-bcfc9f452983f9b0b14f2cda8f4d212a0821c972.tar.gz
llvm-bcfc9f452983f9b0b14f2cda8f4d212a0821c972.tar.bz2
[SLP][REVEC] VectorValuesAndScales should be supported by REVEC. (#135762)
We should align REVEC with the SLP algorithm as closely as possible. For example, by applying REVEC-specific handling when calling IRBuilder's Create methods, performing cost analysis via TTI, and expanding shuffle masks using transformScalarShuffleIndicesToVector. reference commit: 3b18d47ecbaba4e519ebf0d1bc134a404a56a9da
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp137
1 files changed, 64 insertions, 73 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f7c37d8..cc775e4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22144,53 +22144,16 @@ public:
}
Type *ScalarTy = VL.front()->getType();
- if (isa<FixedVectorType>(ScalarTy)) {
- assert(SLPReVec && "FixedVectorType is not expected.");
- unsigned ScalarTyNumElements = getNumElements(ScalarTy);
- Value *ReducedSubTree = PoisonValue::get(
- getWidenedType(ScalarTy->getScalarType(), ScalarTyNumElements));
- for (unsigned I : seq<unsigned>(ScalarTyNumElements)) {
- // Do reduction for each lane.
- // e.g., do reduce add for
- // VL[0] = <4 x Ty> <a, b, c, d>
- // VL[1] = <4 x Ty> <e, f, g, h>
- // Lane[0] = <2 x Ty> <a, e>
- // Lane[1] = <2 x Ty> <b, f>
- // Lane[2] = <2 x Ty> <c, g>
- // Lane[3] = <2 x Ty> <d, h>
- // result[0] = reduce add Lane[0]
- // result[1] = reduce add Lane[1]
- // result[2] = reduce add Lane[2]
- // result[3] = reduce add Lane[3]
- SmallVector<int, 16> Mask =
- createStrideMask(I, ScalarTyNumElements, VL.size());
- Value *Lane = Builder.CreateShuffleVector(VectorizedRoot, Mask);
- Value *Val =
- createSingleOp(Builder, *TTI, Lane,
- OptReusedScalars && SameScaleFactor
- ? SameValuesCounter.front().second
- : 1,
- Lane->getType()->getScalarType() !=
- VL.front()->getType()->getScalarType()
- ? V.isSignedMinBitwidthRootNode()
- : true,
- RdxRootInst->getType());
- ReducedSubTree =
- Builder.CreateInsertElement(ReducedSubTree, Val, I);
- }
- VectorizedTree = GetNewVectorizedTree(VectorizedTree, ReducedSubTree);
- } else {
- Type *VecTy = VectorizedRoot->getType();
- Type *RedScalarTy = VecTy->getScalarType();
- VectorValuesAndScales.emplace_back(
- VectorizedRoot,
- OptReusedScalars && SameScaleFactor
- ? SameValuesCounter.front().second
- : 1,
- RedScalarTy != ScalarTy->getScalarType()
- ? V.isSignedMinBitwidthRootNode()
- : true);
- }
+ Type *VecTy = VectorizedRoot->getType();
+ Type *RedScalarTy = VecTy->getScalarType();
+ VectorValuesAndScales.emplace_back(
+ VectorizedRoot,
+ OptReusedScalars && SameScaleFactor
+ ? SameValuesCounter.front().second
+ : 1,
+ RedScalarTy != ScalarTy->getScalarType()
+ ? V.isSignedMinBitwidthRootNode()
+ : true);
// Count vectorized reduced values to exclude them from final reduction.
for (Value *RdxVal : VL) {
@@ -22363,9 +22326,35 @@ private:
Value *createSingleOp(IRBuilderBase &Builder, const TargetTransformInfo &TTI,
Value *Vec, unsigned Scale, bool IsSigned,
Type *DestTy) {
- Value *Rdx = emitReduction(Vec, Builder, &TTI, DestTy);
- if (Rdx->getType() != DestTy->getScalarType())
- Rdx = Builder.CreateIntCast(Rdx, DestTy->getScalarType(), IsSigned);
+ Value *Rdx;
+ if (auto *VecTy = dyn_cast<FixedVectorType>(DestTy)) {
+ unsigned DestTyNumElements = getNumElements(VecTy);
+ unsigned VF = getNumElements(Vec->getType()) / DestTyNumElements;
+ Rdx = PoisonValue::get(
+ getWidenedType(Vec->getType()->getScalarType(), DestTyNumElements));
+ for (unsigned I : seq<unsigned>(DestTyNumElements)) {
+ // Do reduction for each lane.
+ // e.g., do reduce add for
+ // VL[0] = <4 x Ty> <a, b, c, d>
+ // VL[1] = <4 x Ty> <e, f, g, h>
+ // Lane[0] = <2 x Ty> <a, e>
+ // Lane[1] = <2 x Ty> <b, f>
+ // Lane[2] = <2 x Ty> <c, g>
+ // Lane[3] = <2 x Ty> <d, h>
+ // result[0] = reduce add Lane[0]
+ // result[1] = reduce add Lane[1]
+ // result[2] = reduce add Lane[2]
+ // result[3] = reduce add Lane[3]
+ SmallVector<int, 16> Mask = createStrideMask(I, DestTyNumElements, VF);
+ Value *Lane = Builder.CreateShuffleVector(Vec, Mask);
+ Rdx = Builder.CreateInsertElement(
+ Rdx, emitReduction(Lane, Builder, &TTI, DestTy), I);
+ }
+ } else {
+ Rdx = emitReduction(Vec, Builder, &TTI, DestTy);
+ }
+ if (Rdx->getType() != DestTy)
+ Rdx = Builder.CreateIntCast(Rdx, DestTy, IsSigned);
// Improved analysis for add/fadd/xor reductions with same scale
// factor for all operands of reductions. We can emit scalar ops for
// them instead.
@@ -22432,30 +22421,32 @@ private:
case RecurKind::FMul: {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
if (!AllConsts) {
- if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
- assert(SLPReVec && "FixedVectorType is not expected.");
- unsigned ScalarTyNumElements = VecTy->getNumElements();
- for (unsigned I : seq<unsigned>(ReducedVals.size())) {
- VectorCost += TTI->getShuffleCost(
- TTI::SK_PermuteSingleSrc, VectorTy,
- createStrideMask(I, ScalarTyNumElements, ReducedVals.size()));
- VectorCost += TTI->getArithmeticReductionCost(RdxOpcode, VecTy, FMF,
- CostKind);
- }
- VectorCost += TTI->getScalarizationOverhead(
- VecTy, APInt::getAllOnes(ScalarTyNumElements), /*Insert*/ true,
- /*Extract*/ false, TTI::TCK_RecipThroughput);
- } else if (DoesRequireReductionOp) {
- Type *RedTy = VectorTy->getElementType();
- auto [RType, IsSigned] = R.getRootNodeTypeWithNoCast().value_or(
- std::make_pair(RedTy, true));
- if (RType == RedTy) {
- VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy,
- FMF, CostKind);
+ if (DoesRequireReductionOp) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "FixedVectorType is not expected.");
+ unsigned ScalarTyNumElements = VecTy->getNumElements();
+ for (unsigned I : seq<unsigned>(ReducedVals.size())) {
+ VectorCost += TTI->getShuffleCost(
+ TTI::SK_PermuteSingleSrc, VectorTy,
+ createStrideMask(I, ScalarTyNumElements, ReducedVals.size()));
+ VectorCost += TTI->getArithmeticReductionCost(RdxOpcode, VecTy,
+ FMF, CostKind);
+ }
+ VectorCost += TTI->getScalarizationOverhead(
+ VecTy, APInt::getAllOnes(ScalarTyNumElements), /*Insert*/ true,
+ /*Extract*/ false, TTI::TCK_RecipThroughput);
} else {
- VectorCost = TTI->getExtendedReductionCost(
- RdxOpcode, !IsSigned, RedTy, getWidenedType(RType, ReduxWidth),
- FMF, CostKind);
+ Type *RedTy = VectorTy->getElementType();
+ auto [RType, IsSigned] = R.getRootNodeTypeWithNoCast().value_or(
+ std::make_pair(RedTy, true));
+ if (RType == RedTy) {
+ VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy,
+ FMF, CostKind);
+ } else {
+ VectorCost = TTI->getExtendedReductionCost(
+ RdxOpcode, !IsSigned, RedTy,
+ getWidenedType(RType, ReduxWidth), FMF, CostKind);
+ }
}
} else {
Type *RedTy = VectorTy->getElementType();