diff options
author | Luke Lau <luke@igalia.com> | 2025-09-05 08:23:47 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-05 00:23:47 +0000 |
commit | 4e5e65e55dc5a5ffc6e3db321df7f118e1c42d38 (patch) | |
tree | d9fc9d1a5766db96fe0239dcb3de3ebaa457ee38 /llvm/lib | |
parent | 53efe0a892199570aef9823cf0b1bc50bf4d480b (diff) | |
download | llvm-4e5e65e55dc5a5ffc6e3db321df7f118e1c42d38.zip llvm-4e5e65e55dc5a5ffc6e3db321df7f118e1c42d38.tar.gz llvm-4e5e65e55dc5a5ffc6e3db321df7f118e1c42d38.tar.bz2 |
[VPlan] Only compute reg pressure if considered. NFCI (#156923)
In #149056 VF pruning was changed so that it only pruned VFs that
stemmed from MaxBandwidth being enabled.
However we always compute register pressure regardless of whether or not
max bandwidth is permitted for any VFs (via
`MaxPermissibleVFWithoutMaxBW`).
This skips the computation if not needed and renames the method for
clarity.
The diff in reg-usage.ll is due to the scalable VPlan not actually
having any maxbandwidth VFs, so I've changed it to check the
fixed-length VF instead, which is affected by maxbandwidth.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 143f784..5a1d1e7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -928,8 +928,8 @@ public: /// user options, for the given register kind. bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind); - /// \return True if register pressure should be calculated for the given VF. - bool shouldCalculateRegPressureForVF(ElementCount VF); + /// \return True if register pressure should be considered for the given VF. + bool shouldConsiderRegPressureForVF(ElementCount VF); /// \return The size (in bits) of the smallest and widest types in the code /// that needs to be vectorized. We ignore values that remain scalar such as @@ -3700,7 +3700,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return FixedScalableVFPair::getNone(); } -bool LoopVectorizationCostModel::shouldCalculateRegPressureForVF( +bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF( ElementCount VF) { if (!useMaxBandwidth(VF.isScalable() ? TargetTransformInfo::RGK_ScalableVector @@ -4147,8 +4147,9 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { P->vectorFactors().end()); SmallVector<VPRegisterUsage, 8> RUs; - if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) || - CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector)) + if (any_of(VFs, [this](ElementCount VF) { + return CM.shouldConsiderRegPressureForVF(VF); + })) RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore); for (unsigned I = 0; I < VFs.size(); I++) { @@ -4160,7 +4161,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { /// If the register pressure needs to be considered for VF, /// don't consider the VF as valid if it exceeds the number /// of registers for the target. - if (CM.shouldCalculateRegPressureForVF(VF) && + if (CM.shouldConsiderRegPressureForVF(VF) && RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs)) continue; @@ -6996,8 +6997,9 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { P->vectorFactors().end()); SmallVector<VPRegisterUsage, 8> RUs; - if (CM.useMaxBandwidth(TargetTransformInfo::RGK_ScalableVector) || - CM.useMaxBandwidth(TargetTransformInfo::RGK_FixedWidthVector)) + if (any_of(VFs, [this](ElementCount VF) { + return CM.shouldConsiderRegPressureForVF(VF); + })) RUs = calculateRegisterUsageForPlan(*P, VFs, TTI, CM.ValuesToIgnore); for (unsigned I = 0; I < VFs.size(); I++) { @@ -7023,7 +7025,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { InstructionCost Cost = cost(*P, VF); VectorizationFactor CurrentFactor(VF, Cost, ScalarCost); - if (CM.shouldCalculateRegPressureForVF(VF) && + if (CM.shouldConsiderRegPressureForVF(VF) && RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs)) { LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width " << VF << " because it uses too many registers\n"); |