aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/CMakeLists.txt1
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp469
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp5
3 files changed, 475 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 1b5b537..58afed8 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_component_library(LLVMCodeGen
ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
+ ExpandVectorPredication.cpp
FaultMaps.cpp
FEntryInserter.cpp
FinalizeISel.cpp
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
new file mode 100644
index 0000000..9f34eb0
--- /dev/null
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -0,0 +1,469 @@
+//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for vector predication intrinsics, allowing
+// targets to enable vector predication until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandVectorPredication.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+using VPLegalization = TargetTransformInfo::VPLegalization;
+using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
+
+// Keep this in sync with TargetTransformInfo::VPLegalization.
+#define VPINTERNAL_VPLEGAL_CASES \
+ VPINTERNAL_CASE(Legal) \
+ VPINTERNAL_CASE(Discard) \
+ VPINTERNAL_CASE(Convert)
+
+#define VPINTERNAL_CASE(X) "|" #X
+
+// Override options.
+static cl::opt<std::string> EVLTransformOverride(
+ "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %evl parameter (Used in "
+ "testing)."));
+
+static cl::opt<std::string> MaskTransformOverride(
+ "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
+ cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
+ ". If non-empty, Ignore "
+ "TargetTransformInfo and "
+ "always use this transformation for the %mask parameter (Used in "
+ "testing)."));
+
+#undef VPINTERNAL_CASE
+#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
+
+static VPTransform parseOverrideOption(const std::string &TextOpt) {
+ return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
+}
+
+#undef VPINTERNAL_VPLEGAL_CASES
+
+// Whether any override options are set.
+static bool anyExpandVPOverridesSet() {
+ return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
+}
+
+#define DEBUG_TYPE "expandvp"
+
+STATISTIC(NumFoldedVL, "Number of folded vector length params");
+STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
+
+///// Helpers {
+
+/// \returns Whether the vector mask \p MaskVal has all lane bits set.
+static bool isAllTrueMask(Value *MaskVal) {
+ auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
+ return ConstVec && ConstVec->isAllOnesValue();
+}
+
+/// \returns A non-excepting divisor constant for this type.
+static Constant *getSafeDivisor(Type *DivTy) {
+ assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
+ return ConstantInt::get(DivTy, 1u, false);
+}
+
+/// Transfer operation properties from \p OldVPI to \p NewVal.
+static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
+ auto *NewInst = dyn_cast<Instruction>(&NewVal);
+ if (!NewInst || !isa<FPMathOperator>(NewVal))
+ return;
+
+ auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
+ if (!OldFMOp)
+ return;
+
+ NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
+}
+
+/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
+/// OldVP gets erased.
+static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
+ transferDecorations(NewOp, OldOp);
+ OldOp.replaceAllUsesWith(&NewOp);
+ OldOp.eraseFromParent();
+}
+
+//// } Helpers
+
+namespace {
+
+// Expansion pass state at function scope.
+struct CachingVPExpander {
+ Function &F;
+ const TargetTransformInfo &TTI;
+
+ /// \returns A (fixed length) vector with ascending integer indices
+ /// (<0, 1, ..., NumElems-1>).
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p LaneTy
+ /// Integer element type of the result vector.
+ /// \p NumElems
+ /// Number of vector elements.
+ Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems);
+
+ /// \returns A bitmask that is true where the lane position is less-than \p
+ /// EVLParam
+ ///
+ /// \p Builder
+ /// Used for instruction creation.
+ /// \p VLParam
+ /// The explicit vector length parameter to test against the lane
+ /// positions.
+ /// \p ElemCount
+ /// Static (potentially scalable) number of vector elements.
+ Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
+ ElementCount ElemCount);
+
+ Value *foldEVLIntoMask(VPIntrinsic &VPI);
+
+ /// "Remove" the %evl parameter of \p PI by setting it to the static vector
+ /// length of the operation.
+ void discardEVLParameter(VPIntrinsic &PI);
+
+ /// \brief Lower this VP binary operator to a unpredicated binary operator.
+ Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &PI);
+
+ /// \brief Query TTI and expand the vector predication in \p P accordingly.
+ Value *expandPredication(VPIntrinsic &PI);
+
+ /// \brief Determine how and whether the VPIntrinsic \p VPI shall be
+ /// expanded. This overrides TTI with the cl::opts listed at the top of this
+ /// file.
+ VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
+ bool UsingTTIOverrides;
+
+public:
+ CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
+ : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
+
+ bool expandVectorPredication();
+};
+
+//// CachingVPExpander {
+
+Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
+ unsigned NumElems) {
+ // TODO add caching
+ SmallVector<Constant *, 16> ConstElems;
+
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx)
+ ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
+
+ return ConstantVector::get(ConstElems);
+}
+
+Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
+ Value *EVLParam,
+ ElementCount ElemCount) {
+ // TODO add caching
+ // Scalable vector %evl conversion.
+ if (ElemCount.isScalable()) {
+ auto *M = Builder.GetInsertBlock()->getModule();
+ Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
+ Function *ActiveMaskFunc = Intrinsic::getDeclaration(
+ M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
+ // `get_active_lane_mask` performs an implicit less-than comparison.
+ Value *ConstZero = Builder.getInt32(0);
+ return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
+ }
+
+ // Fixed vector %evl conversion.
+ Type *LaneTy = EVLParam->getType();
+ unsigned NumElems = ElemCount.getFixedValue();
+ Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
+ Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
+ return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
+}
+
+Value *
+CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ assert((isSafeToSpeculativelyExecute(&VPI) ||
+ VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ auto OC = static_cast<Instruction::BinaryOps>(VPI.getFunctionalOpcode());
+ assert(Instruction::isBinaryOp(OC));
+
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Mask = VPI.getMaskParam();
+
+ // Blend in safe operands.
+ if (Mask && !isAllTrueMask(Mask)) {
+ switch (OC) {
+ default:
+ // Can safely ignore the predicate.
+ break;
+
+ // Division operators need a safe divisor on masked-off lanes (1).
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ // 2nd operand must not be zero.
+ Value *SafeDivisor = getSafeDivisor(VPI.getType());
+ Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
+ }
+ }
+
+ Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
+
+ replaceOperation(*NewBinOp, VPI);
+ return NewBinOp;
+}
+
+void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
+
+ if (VPI.canIgnoreVectorLengthParam())
+ return;
+
+ Value *EVLParam = VPI.getVectorLengthParam();
+ if (!EVLParam)
+ return;
+
+ ElementCount StaticElemCount = VPI.getStaticVectorLength();
+ Value *MaxEVL = nullptr;
+ Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
+ if (StaticElemCount.isScalable()) {
+ // TODO add caching
+ auto *M = VPI.getModule();
+ Function *VScaleFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
+ IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
+ Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
+ Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
+ MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
+ /*NUW*/ true, /*NSW*/ false);
+ } else {
+ MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
+ }
+ VPI.setVectorLengthParam(MaxEVL);
+}
+
+Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Ineffective %evl parameter and so nothing to do here.
+ if (VPI.canIgnoreVectorLengthParam())
+ return &VPI;
+
+ // Only VP intrinsics can have an %evl parameter.
+ Value *OldMaskParam = VPI.getMaskParam();
+ Value *OldEVLParam = VPI.getVectorLengthParam();
+ assert(OldMaskParam && "no mask param to fold the vl param into");
+ assert(OldEVLParam && "no EVL param to fold away");
+
+ LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
+ LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
+
+ // Convert the %evl predication into vector mask predication.
+ ElementCount ElemCount = VPI.getStaticVectorLength();
+ Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
+ Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
+ VPI.setMaskParam(NewMaskParam);
+
+ // Drop the %evl parameter.
+ discardEVLParameter(VPI);
+ assert(VPI.canIgnoreVectorLengthParam() &&
+ "transformation did not render the evl param ineffective!");
+
+ // Reassess the modified instruction.
+ return &VPI;
+}
+
+Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
+ LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
+
+ IRBuilder<> Builder(&VPI);
+
+ // Try lowering to a LLVM instruction first.
+ unsigned OC = VPI.getFunctionalOpcode();
+
+ if (Instruction::isBinaryOp(OC))
+ return expandPredicationInBinaryOperator(Builder, VPI);
+
+ return &VPI;
+}
+
+//// } CachingVPExpander
+
+struct TransformJob {
+ VPIntrinsic *PI;
+ TargetTransformInfo::VPLegalization Strategy;
+ TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
+ : PI(PI), Strategy(InitStrat) {}
+
+ bool isDone() const { return Strategy.shouldDoNothing(); }
+};
+
+void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
+ // Speculatable instructions do not strictly need predication.
+ if (isSafeToSpeculativelyExecute(&I)) {
+ // Converting a speculatable VP intrinsic means dropping %mask and %evl.
+ // No need to expand %evl into the %mask only to ignore that code.
+ if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
+ return;
+ }
+
+ // We have to preserve the predicating effect of %evl for this
+ // non-speculatable VP intrinsic.
+ // 1) Never discard %evl.
+ // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
+ // %evl gets folded into %mask.
+ if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
+ (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
+ LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
+ }
+}
+
+VPLegalization
+CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
+ auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
+ if (LLVM_LIKELY(!UsingTTIOverrides)) {
+ // No overrides - we are in production.
+ return VPStrat;
+ }
+
+ // Overrides set - we are in testing, the following does not need to be
+ // efficient.
+ VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
+ VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
+ return VPStrat;
+}
+
+/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
+bool CachingVPExpander::expandVectorPredication() {
+ SmallVector<TransformJob, 16> Worklist;
+
+ // Collect all VPIntrinsics that need expansion and determine their expansion
+ // strategy.
+ for (auto &I : instructions(F)) {
+ auto *VPI = dyn_cast<VPIntrinsic>(&I);
+ if (!VPI)
+ continue;
+ auto VPStrat = getVPLegalizationStrategy(*VPI);
+ sanitizeStrategy(I, VPStrat);
+ if (!VPStrat.shouldDoNothing())
+ Worklist.emplace_back(VPI, VPStrat);
+ }
+ if (Worklist.empty())
+ return false;
+
+ // Transform all VPIntrinsics on the worklist.
+ LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
+ << " instructions ::::\n");
+ for (TransformJob Job : Worklist) {
+ // Transform the EVL parameter.
+ switch (Job.Strategy.EVLParamStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ discardEVLParameter(*Job.PI);
+ break;
+ case VPLegalization::Convert:
+ if (foldEVLIntoMask(*Job.PI))
+ ++NumFoldedVL;
+ break;
+ }
+ Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
+
+ // Replace with a non-predicated operation.
+ switch (Job.Strategy.OpStrategy) {
+ case VPLegalization::Legal:
+ break;
+ case VPLegalization::Discard:
+ llvm_unreachable("Invalid strategy for operators.");
+ case VPLegalization::Convert:
+ expandPredication(*Job.PI);
+ ++NumLoweredVPOps;
+ break;
+ }
+ Job.Strategy.OpStrategy = VPLegalization::Legal;
+
+ assert(Job.isDone() && "incomplete transformation");
+ }
+
+ return true;
+}
+class ExpandVectorPredication : public FunctionPass {
+public:
+ static char ID;
+ ExpandVectorPredication() : FunctionPass(ID) {
+ initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ CachingVPExpander VPExpander(F, *TTI);
+ return VPExpander.expandVectorPredication();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+char ExpandVectorPredication::ID;
+INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
+ "Expand vector predication intrinsics", false, false)
+
+FunctionPass *llvm::createExpandVectorPredicationPass() {
+ return new ExpandVectorPredication();
+}
+
+PreservedAnalyses
+ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ CachingVPExpander VPExpander(F, TTI);
+ if (!VPExpander.expandVectorPredication())
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 6e80235..b5e8f9e 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -864,6 +864,11 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());
+ // Expand vector predication intrinsics into standard IR instructions.
+ // This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
+ // passes since it emits those kinds of intrinsics.
+ addPass(createExpandVectorPredicationPass());
+
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
// that stores/loads element one-by-one if the appropriate mask bit is set.