aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp16
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp12
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp63
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp44
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp21
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp11
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp12
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp27
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp17
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp28
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp166
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp57
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h54
28 files changed, 393 insertions, 222 deletions
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 077d29f..3b59ebb 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -272,6 +272,9 @@ AA::getInitialValueForObj(Attributor &A, const AbstractAttribute &QueryingAA,
}
if (RangePtr && !RangePtr->offsetOrSizeAreUnknown()) {
+ int64_t StorageSize = DL.getTypeStoreSize(&Ty);
+ if (StorageSize != RangePtr->Size)
+ return nullptr;
APInt Offset = APInt(64, RangePtr->Offset);
return ConstantFoldLoadFromConst(Initializer, &Ty, Offset, DL);
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 6d16599..5048561 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1044,15 +1044,13 @@ struct AAPointerInfoImpl
return AAPointerInfo::manifest(A);
}
- virtual const_bin_iterator begin() const override { return State::begin(); }
- virtual const_bin_iterator end() const override { return State::end(); }
- virtual int64_t numOffsetBins() const override {
- return State::numOffsetBins();
- }
- virtual bool reachesReturn() const override {
+ const_bin_iterator begin() const override { return State::begin(); }
+ const_bin_iterator end() const override { return State::end(); }
+ int64_t numOffsetBins() const override { return State::numOffsetBins(); }
+ bool reachesReturn() const override {
return !ReturnedOffsets.isUnassigned();
}
- virtual void addReturnedOffsetsTo(OffsetInfo &OI) const override {
+ void addReturnedOffsetsTo(OffsetInfo &OI) const override {
if (ReturnedOffsets.isUnknown()) {
OI.setUnknown();
return;
@@ -6653,7 +6651,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToStack(IRP, A) {}
- ~AAHeapToStackFunction() {
+ ~AAHeapToStackFunction() override {
// Ensure we call the destructor so we release any memory allocated in the
// sets.
for (auto &It : AllocationInfos)
@@ -8374,7 +8372,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
AccessKind2Accesses.fill(nullptr);
}
- ~AAMemoryLocationImpl() {
+ ~AAMemoryLocationImpl() override {
// The AccessSets are allocated via a BumpPtrAllocator, we call
// the destructor manually.
for (AccessSet *AS : AccessKind2Accesses)
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5e2247f..d7eb745 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2693,7 +2693,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
- ~AAExecutionDomainFunction() { delete RPOT; }
+ ~AAExecutionDomainFunction() override { delete RPOT; }
void initialize(Attributor &A) override {
Function *F = getAnchorScope();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 669d4f0..8d9933b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
return BinaryOperator::CreateSub(ConstCtlz, X);
}
+
+ // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
+ if (Op0->hasOneUse() &&
+ match(Op0,
+ m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
+ Type *Ty = II.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
+ {X, IC.Builder.getFalse()});
+ auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
+ return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
+ }
}
// cttz(Pow2) -> Log2(Pow2)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ede73f8..9c75d9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -72,7 +72,7 @@ public:
: InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI,
PSI, DL, RPOT) {}
- virtual ~InstCombinerImpl() = default;
+ ~InstCombinerImpl() override = default;
/// Perform early cleanup and prepare the InstCombine worklist.
bool prepareWorklist(Function &F);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5aa8de3..f5130da 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(),
CondVal, FalseVal));
+ // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X,
+ // bitwidth-1, 1 -> scmp(X, 0)
+ // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0)
+ unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+ CmpPredicate Pred;
+ Value *CmpLHS, *CmpRHS;
+
+ // Canonicalize sign function ashr patterns:
+ // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0)
+ // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0)
+ if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
+ m_Value(TrueVal), m_Value(FalseVal))) &&
+ ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) &&
+ match(TrueVal,
+ m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) &&
+ match(FalseVal, m_One())) ||
+ (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) &&
+ match(TrueVal, m_One()) &&
+ match(FalseVal,
+ m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) {
+
+ Function *Scmp = Intrinsic::getOrInsertDeclaration(
+ SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()});
+ return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)});
+ }
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 67e2aae..67f837c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2327,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
return ConstantVector::get(NewVecC);
}
+// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
+static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
+ Constant *Splat, bool SplatLHS,
+ const DataLayout &DL) {
+ ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
+ Constant *LHS = ConstantVector::getSplat(EC, Splat);
+ Constant *RHS = Vector;
+ if (!SplatLHS)
+ std::swap(LHS, RHS);
+ return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
if (!isa<VectorType>(Inst.getType()))
return nullptr;
@@ -2338,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
assert(cast<VectorType>(RHS->getType())->getElementCount() ==
cast<VectorType>(Inst.getType())->getElementCount());
+ auto foldConstantsThroughSubVectorInsertSplat =
+ [&](Value *MaybeSubVector, Value *MaybeSplat,
+ bool SplatLHS) -> Instruction * {
+ Value *Idx;
+ Constant *Splat, *SubVector, *Dest;
+ if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
+ !match(MaybeSubVector,
+ m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
+ m_Value(Idx))))
+ return nullptr;
+ SubVector =
+ constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
+ Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
+ if (!SubVector || !Dest)
+ return nullptr;
+ auto *InsertVector =
+ Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
+ return replaceInstUsesWith(Inst, InsertVector);
+ };
+
+ // If one operand is a constant splat and the other operand is a
+ // `vector.insert` where both the destination and subvector are constant,
+ // apply the operation to both the destination and subvector, returning a new
+ // constant `vector.insert`. This helps constant folding for scalable vectors.
+ if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+ /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
+ return Folded;
+ if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+ /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
+ return Folded;
+
// If both operands of the binop are vector concatenations, then perform the
// narrow binop on each pair of the source operands followed by concatenation
// of the results.
@@ -3315,21 +3358,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (TyAllocSize == 1) {
// Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
- // but only if the result pointer is only used as if it were an integer,
- // or both point to the same underlying object (otherwise provenance is
- // not necessarily retained).
+ // but only if the result pointer is only used as if it were an integer.
+ // (The case where the underlying object is the same is handled by
+ // InstSimplify.)
Value *X = GEP.getPointerOperand();
Value *Y;
- if (match(GEP.getOperand(1),
- m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
+ if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
+ m_PtrToIntOrAddr(m_Specific(X)))) &&
GEPType == Y->getType()) {
- bool HasSameUnderlyingObject =
- getUnderlyingObject(X) == getUnderlyingObject(Y);
+ bool HasNonAddressBits =
+ DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
bool Changed = false;
GEP.replaceUsesWithIf(Y, [&](Use &U) {
- bool ShouldReplace = HasSameUnderlyingObject ||
- isa<ICmpInst>(U.getUser()) ||
- isa<PtrToIntInst>(U.getUser());
+ bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) ||
+ (!HasNonAddressBits &&
+ isa<ICmpInst, PtrToIntInst>(U.getUser()));
Changed |= ShouldReplace;
return ShouldReplace;
});
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cb6ca72..7c364f8 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
- // mutiple of the pointer alignment. Otherwise, the element alignment
+ // multiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
@@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF(
// Putting globals in a comdat changes the semantic and potentially cause
// false negative odr violations at link time. If odr indicators are used, we
- // keep the comdat sections, as link time odr violations will be dectected on
+ // keep the comdat sections, as link time odr violations will be detected on
// the odr indicator symbols.
bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty();
@@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
I->eraseFromParent();
}
- // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+ // Replace all uses of AddressReturnedByAlloca with NewAddressPtr.
AI->replaceAllUsesWith(NewAddressPtr);
// We are done. Erase old alloca from parent.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 72e8e50..0688bc7 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -359,7 +359,7 @@ class CHR {
unsigned Count = 0;
// Find out how many times region R is cloned. Note that if the parent
// of R is cloned, R is also cloned, but R's clone count is not updated
- // from the clone of the parent. We need to accumlate all the counts
+ // from the clone of the parent. We need to accumulate all the counts
// from the ancestors to get the clone count.
while (R) {
Count += DuplicationCount[R];
@@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
BI->swapSuccessors();
// Don't need to swap this in terms of
// TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
- // mean whehter the branch is likely go into the if-then rather than
+ // mean whether the branch is likely go into the if-then rather than
// successor0/successor1 and because we can tell which edge is the then or
// the else one by comparing the destination to the region exit block.
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index cf87e35..1e5946a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -83,7 +83,7 @@ static cl::opt<unsigned>
// ICP the candidate function even when only a declaration is present.
static cl::opt<bool> ICPAllowDecls(
"icp-allow-decls", cl::init(false), cl::Hidden,
- cl::desc("Promote the target candidate even when the defintion "
+ cl::desc("Promote the target candidate even when the definition "
" is not available"));
// ICP hot candidate functions only. When setting to false, non-cold functions
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 5e7548b..7795cce 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate(
cl::init(false));
// If the option is not specified, the default behavior about whether
-// counter promotion is done depends on how instrumentaiton lowering
+// counter promotion is done depends on how instrumentation lowering
// pipeline is setup, i.e., the default value of true of this option
// does not mean the promotion will be done by default. Explicitly
// setting this option can override the default behavior.
@@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
- "value profiling detected in function with no counter incerement");
+ "value profiling detected in function with no counter increment");
GlobalVariable *DataVar = It->second.DataVar;
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 3c0f185..05616d8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) {
}
}
-// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
+// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible
// to the runtime, changing shadow count behavior.
void createMemprofHistogramFlagVar(Module &M) {
const StringRef VarName(MemProfHistogramFlagVar);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b6cbecb..471c6ec 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4);
static const Align kShadowTLSAlignment = Align(8);
// These constants must be kept in sync with the ones in msan.h.
+// TODO: increase size to match SVE/SVE2/SME/SME2 limits
static const unsigned kParamTLSSize = 800;
static const unsigned kRetvalTLSSize = 800;
@@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ static bool isAArch64SVCount(Type *Ty) {
+ if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty))
+ return TTy->getName() == "aarch64.svcount";
+ return false;
+ }
+
+ // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
+ // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
+ static bool isScalableNonVectorType(Type *Ty) {
+ if (!isAArch64SVCount(Ty))
+ LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty
+ << "\n");
+
+ return Ty->isScalableTy() && !isa<VectorType>(Ty);
+ }
+
void materializeChecks() {
#ifndef NDEBUG
// For assert below.
@@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
+ if (isScalableNonVectorType(OrigTy)) {
+ LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy
+ << "\n");
+ return OrigTy;
+ }
+
uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
<< *OrigIns << "\n");
return;
}
-#ifndef NDEBUG
+
Type *ShadowTy = Shadow->getType();
+ if (isScalableNonVectorType(ShadowTy)) {
+ LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
+ << " before " << *OrigIns << "\n");
+ return;
+ }
+#ifndef NDEBUG
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
"Can only insert checks for integer, vector, and aggregate shadow "
@@ -3107,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// If we don't instrument it and it gets inlined,
/// our interceptor will not kick in and we will lose the memmove.
/// If we instrument the call here, but it does not get inlined,
- /// we will memove the shadow twice: which is bad in case
+ /// we will memmove the shadow twice: which is bad in case
/// of overlapping regions. So, we simply lower the intrinsic to a call.
///
/// Similar situation exists for memcpy and memset.
@@ -4746,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// _mm_round_ps / _mm_round_ps.
// Similar to maybeHandleSimpleNomemIntrinsic except
- // the second argument is guranteed to be a constant integer.
+ // the second argument is guaranteed to be a constant integer.
void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
assert(I.getArgOperand(0)->getType() == I.getType());
assert(I.arg_size() == 2);
@@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// an extra "select". This results in much more compact IR.
// Sa = select Sb, poisoned, (select b, Sc, Sd)
Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+ } else if (isScalableNonVectorType(I.getType())) {
+ // This is intended to handle target("aarch64.svcount"), which can't be
+ // handled in the else branch because of incompatibility with CreateXor
+ // ("The supported LLVM operations on this type are limited to load,
+ // store, phi, select and alloca instructions").
+
+ // TODO: this currently underapproximates. Use Arm SVE EOR in the else
+ // branch as needed instead.
+ Sa1 = getCleanShadow(getShadowTy(I.getType()));
} else {
// Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
// If Sb (condition is poisoned), look for bits in c and d that are equal
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index f5b6686..5f87ed6 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
assert(areAllBBsReachable(
F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
.getManager()) &&
- "Function has unreacheable basic blocks. The expectation was that "
+ "Function has unreachable basic blocks. The expectation was that "
"DCE was run before.");
auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 0a358d4..de7c169 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Value *RealContext = nullptr;
StructType *ThisContextType = nullptr;
- Value *TheRootFuctionData = nullptr;
+ Value *TheRootFunctionData = nullptr;
Value *ExpectedCalleeTLSAddr = nullptr;
Value *CallsiteInfoTLSAddr = nullptr;
const bool HasMusttail = [&F]() {
@@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Guid = Builder.getInt64(
AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue())));
// The type of the context of this function is now knowable since we have
- // NumCallsites and NumCounters. We delcare it here because it's more
+ // NumCallsites and NumCounters. We declare it here because it's more
// convenient - we have the Builder.
ThisContextType = StructType::get(
F.getContext(),
@@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
// Figure out which way we obtain the context object for this function -
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
- // former case, we also set TheRootFuctionData since we need to release it
- // at the end (plus it can be used to know if we have an entrypoint or a
- // regular function)
- // Don't set a name, they end up taking a lot of space and we don't need
- // them.
+ // former case, we also set TheRootFunctionData since we need to release
+ // it at the end (plus it can be used to know if we have an entrypoint or
+ // a regular function). Don't set a name, they end up taking a lot of
+ // space and we don't need them.
// Zero-initialize the FunctionData, except for functions that have
// musttail calls. There, we set the CtxRoot field to 1, which will be
// treated as a "can't be set as root".
- TheRootFuctionData = new GlobalVariable(
+ TheRootFunctionData = new GlobalVariable(
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
HasMusttail ? CannotBeRootInitializer
: Constant::getNullValue(FunctionDataTy));
if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
- StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters),
+ StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit(
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
} else {
- Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid,
+ Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid,
Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit([&] {
@@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
} else if (!HasMusttail && isa<ReturnInst>(I)) {
// Remember to release the context if we are an entrypoint.
IRBuilder<> Builder(&I);
- Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
+ Builder.CreateCall(ReleaseCtx, {TheRootFunctionData});
ContextWasReleased = true;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 120c4f6..71736cf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
function_ref<LoopInfo *(Function &)> LookupLI,
PGOInstrumentationType InstrumentationType) {
- // For the context-sensitve instrumentation, we should have a separated pass
+ // For the context-sensitive instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (InstrumentationType == PGOInstrumentationType::FDO)
createIRLevelProfileFlagVar(M, InstrumentationType);
@@ -2248,7 +2248,7 @@ static bool annotateAllFunctions(
Func.populateCoverage();
continue;
}
- // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
+ // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
// attribute and drop all the profile counters.
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 4801ac7..210b126 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -481,15 +481,18 @@ StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
} // namespace
SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
- : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
+ SanitizerBinaryMetadataOptions Opts,
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ ArrayRef<std::string> IgnorelistFiles)
+ : Options(std::move(Opts)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()),
+ IgnorelistFiles(std::move(IgnorelistFiles)) {}
PreservedAnalyses
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
std::unique_ptr<SpecialCaseList> Ignorelist;
if (!IgnorelistFiles.empty()) {
- Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
- *vfs::getRealFileSystem());
+ Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, *VFS);
if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index b74a070..09abf6a 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -318,6 +318,18 @@ private:
};
} // namespace
+SanitizerCoveragePass::SanitizerCoveragePass(
+ SanitizerCoverageOptions Options, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ const std::vector<std::string> &AllowlistFiles,
+ const std::vector<std::string> &BlocklistFiles)
+ : Options(std::move(Options)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()) {
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, *this->VFS);
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, *this->VFS);
+}
+
PreservedAnalyses SanitizerCoveragePass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 0d48a35..fd0e9f1 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) {
}
// Do not instrument known races/"benign races" that come from compiler
-// instrumentatin. The user has no way of suppressing them.
+// instrumentation. The user has no way of suppressing them.
static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
// Peel off GEPs and BitCasts.
Addr = Addr->stripInBoundsOffsets();
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 9471ae3..78d4a57e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
// doesn't match, then we call the runtime (which may yet determine that
// the mismatch is okay).
//
- // The checks generated below have the following strucutre.
+ // The checks generated below have the following structure.
//
// ; First we load the descriptor for the load from shadow memory and
// ; compare it against the type descriptor for the current access type.
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 371d9e6..a9ab3b3 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -819,7 +819,7 @@ public:
OS << "]";
} else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
- << " Switch:" << *PS->Switch << " Edge: [";
+ << " Edge: [";
PS->From->printAsOperand(OS);
OS << ",";
PS->To->printAsOperand(OS);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d831c27..c537be5c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7551,6 +7551,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *Condition = SI->getCondition();
@@ -7573,12 +7574,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
@@ -7598,6 +7593,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
Builder.SetInsertPoint(SI);
+ if (!SI->defaultDestUnreachable()) {
+ // Let non-power-of-two inputs jump to the default case, when the latter is
+ // reachable.
+ auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
+ auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
+
+ auto *OrigBB = SI->getParent();
+ auto *DefaultCaseBB = SI->getDefaultDest();
+ BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
+ auto It = OrigBB->getTerminator()->getIterator();
+ BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ It->eraseFromParent();
+
+ addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
+ }
+
// Replace each case with its trailing zeros number.
for (auto &Case : SI->cases()) {
auto *OrigValue = Case.getCaseValue();
@@ -7953,7 +7966,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
Options.ConvertSwitchToLookupTable))
return requestResimplify();
- if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (reduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3356516..facb0fa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4378,8 +4378,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
const SCEV *TC =
vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
- RemainingIterations =
- SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+ const SCEV *KnownMinTC;
+ bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
+ // Use versions of TC and VF in which both are either scalable or fixed.
+ if (ScalableTC == MainLoopVF.isScalable())
+ RemainingIterations =
+ SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+ else if (ScalableTC) {
+ const SCEV *EstimatedTC = SE.getMulExpr(
+ KnownMinTC,
+ SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
+ RemainingIterations = SE.getURemExpr(
+ EstimatedTC, SE.getElementCount(TCType, MainLoopVF * IC));
+ } else
+ RemainingIterations =
+ SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
// No iterations left to process in the epilogue.
if (RemainingIterations->isZero())
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cdb9e7e..4fcaf6d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17641,12 +17641,28 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars, [&](Value *V) {
- return isa<PoisonValue>(V) ||
- (E->Idx == 0 && isa<InsertElementInst>(V)) ||
- E->isCopyableElement(V) ||
- (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
- }))
+ (all_of(E->Scalars,
+ [&](Value *V) {
+ return isa<PoisonValue>(V) ||
+ (E->Idx == 0 && isa<InsertElementInst>(V)) ||
+ E->isCopyableElement(V) ||
+ (!isVectorLikeInstWithConstOps(V) &&
+ isUsedOutsideBlock(V));
+ }) &&
+ (!E->doesNotNeedToSchedule() ||
+ any_of(E->Scalars,
+ [&](Value *V) {
+ if (!isa<Instruction>(V) ||
+ (E->hasCopyableElements() && E->isCopyableElement(V)))
+ return false;
+ return !areAllOperandsNonInsts(V);
+ }) ||
+ none_of(E->Scalars, [&](Value *V) {
+ if (!isa<Instruction>(V) ||
+ (E->hasCopyableElements() && E->isCopyableElement(V)))
+ return false;
+ return MustGather.contains(V);
+ }))))
Res = FindLastInst();
else
Res = FindFirstInst();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a1ad2db..5b9f005 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -398,7 +398,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown())
: VPDef(SC), VPUser(Operands), DL(DL) {}
- virtual ~VPRecipeBase() = default;
+ ~VPRecipeBase() override = default;
/// Clone the current recipe.
virtual VPRecipeBase *clone() = 0;
@@ -576,7 +576,7 @@ public:
return R && classof(R);
}
- virtual VPSingleDefRecipe *clone() override = 0;
+ VPSingleDefRecipe *clone() override = 0;
/// Returns the underlying instruction.
Instruction *getUnderlyingInstr() {
@@ -907,7 +907,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
return R && classof(R);
}
- virtual VPRecipeWithIRFlags *clone() override = 0;
+ VPRecipeWithIRFlags *clone() override = 0;
static inline bool classof(const VPSingleDefRecipe *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
@@ -2068,7 +2068,7 @@ public:
return classof(static_cast<const VPRecipeBase *>(R));
}
- virtual void execute(VPTransformState &State) override = 0;
+ void execute(VPTransformState &State) override = 0;
/// Returns the step value of the induction.
VPValue *getStepValue() { return getOperand(1); }
@@ -2557,7 +2557,7 @@ public:
VPCostContext &Ctx) const override;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
+ bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
@@ -4172,11 +4172,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveIns;
- /// Mapping from SCEVs to the VPValues representing their expansions.
- /// NOTE: This mapping is temporary and will be removed once all users have
- /// been modeled in VPlan directly.
- DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
-
/// Blocks allocated and owned by the VPlan. They will be deleted once the
/// VPlan is destroyed.
SmallVector<VPBlockBase *> CreatedBlocks;
@@ -4424,15 +4419,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- VPValue *getSCEVExpansion(const SCEV *S) const {
- return SCEVToExpansion.lookup(S);
- }
-
- void addSCEVExpansion(const SCEV *S, VPValue *V) {
- assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
- SCEVToExpansion[S] = V;
- }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3e85e6f..84817d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
}
+/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
+/// Returns an optional pair, where the first element indicates whether it is
+/// an intrinsic ID.
+static std::optional<std::pair<bool, unsigned>>
+getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
+ return TypeSwitch<const VPSingleDefRecipe *,
+ std::optional<std::pair<bool, unsigned>>>(R)
+ .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+ VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
+ [](auto *I) { return std::make_pair(false, I->getOpcode()); })
+ .Case<VPWidenIntrinsicRecipe>([](auto *I) {
+ return std::make_pair(true, I->getVectorIntrinsicID());
+ })
+ .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
+ // For recipes that do not directly map to LLVM IR instructions,
+ // assign opcodes after the last VPInstruction opcode (which is also
+ // after the last IR Instruction opcode), based on the VPDefID.
+ return std::make_pair(false,
+ VPInstruction::OpsEnd + 1 + I->getVPDefID());
+ })
+ .Default([](auto *) { return std::nullopt; });
+}
+
/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
-/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are
-/// foldable live-ins.
-static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
- ArrayRef<VPValue *> Operands,
- const DataLayout &DL, VPTypeAnalysis &TypeInfo) {
+/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
+/// Operands are foldable live-ins.
+static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
+ ArrayRef<VPValue *> Operands,
+ const DataLayout &DL,
+ VPTypeAnalysis &TypeInfo) {
+ auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R);
+ if (!OpcodeOrIID)
+ return nullptr;
+
SmallVector<Value *, 4> Ops;
for (VPValue *Op : Operands) {
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
@@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
Ops.push_back(Op->getLiveInIRValue());
}
- InstSimplifyFolder Folder(DL);
- if (Instruction::isBinaryOp(Opcode))
- return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
+ auto FoldToIRValue = [&]() -> Value * {
+ InstSimplifyFolder Folder(DL);
+ if (OpcodeOrIID->first) {
+ if (R.getNumOperands() != 2)
+ return nullptr;
+ unsigned ID = OpcodeOrIID->second;
+ return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],
+ TypeInfo.inferScalarType(&R));
+ }
+ unsigned Opcode = OpcodeOrIID->second;
+ if (Instruction::isBinaryOp(Opcode))
+ return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
+ Ops[0], Ops[1]);
+ if (Instruction::isCast(Opcode))
+ return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+ TypeInfo.inferScalarType(R.getVPSingleValue()));
+ switch (Opcode) {
+ case VPInstruction::LogicalAnd:
+ return Folder.FoldSelect(Ops[0], Ops[1],
+ ConstantInt::getNullValue(Ops[1]->getType()));
+ case VPInstruction::Not:
+ return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
+ Constant::getAllOnesValue(Ops[0]->getType()));
+ case Instruction::Select:
+ return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
Ops[1]);
- if (Instruction::isCast(Opcode))
- return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
- TypeInfo.inferScalarType(R.getVPSingleValue()));
- switch (Opcode) {
- case VPInstruction::LogicalAnd:
- return Folder.FoldSelect(Ops[0], Ops[1],
- ConstantInt::getNullValue(Ops[1]->getType()));
- case VPInstruction::Not:
- return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
- Constant::getAllOnesValue(Ops[0]->getType()));
- case Instruction::Select:
- return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
- case Instruction::ICmp:
- case Instruction::FCmp:
- return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
- Ops[1]);
- case Instruction::GetElementPtr: {
- auto &RFlags = cast<VPRecipeWithIRFlags>(R);
- auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
- return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
- RFlags.getGEPNoWrapFlags());
- }
- case VPInstruction::PtrAdd:
- case VPInstruction::WidePtrAdd:
- return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
- Ops[1],
- cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
- // An extract of a live-in is an extract of a broadcast, so return the
- // broadcasted element.
- case Instruction::ExtractElement:
- assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
- return Ops[0];
- }
+ case Instruction::GetElementPtr: {
+ auto &RFlags = cast<VPRecipeWithIRFlags>(R);
+ auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
+ return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
+ drop_begin(Ops), RFlags.getGEPNoWrapFlags());
+ }
+ case VPInstruction::PtrAdd:
+ case VPInstruction::WidePtrAdd:
+ return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()),
+ Ops[0], Ops[1],
+ cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+ // An extract of a live-in is an extract of a broadcast, so return the
+ // broadcasted element.
+ case Instruction::ExtractElement:
+ assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
+ return Ops[0];
+ }
+ return nullptr;
+ };
+
+ if (Value *V = FoldToIRValue())
+ return R.getParent()->getPlan()->getOrAddLiveIn(V);
return nullptr;
}
@@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// Simplification of live-in IR values for SingleDef recipes using
// InstSimplifyFolder.
- if (TypeSwitch<VPRecipeBase *, bool>(&R)
- .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
- VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) {
- const DataLayout &DL =
- Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
- Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL,
- TypeInfo);
- if (V)
- I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
- return V;
- })
- .Default([](auto *) { return false; }))
- return;
+ const DataLayout &DL =
+ Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
+ if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo))
+ return Def->replaceAllUsesWith(V);
// Fold PredPHI LiveIn -> LiveIn.
if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
@@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return Def == getEmptyKey() || Def == getTombstoneKey();
}
- /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
- /// Returns an optional pair, where the first element indicates whether it is
- /// an intrinsic ID.
- static std::optional<std::pair<bool, unsigned>>
- getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
- return TypeSwitch<const VPSingleDefRecipe *,
- std::optional<std::pair<bool, unsigned>>>(R)
- .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
- VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
- [](auto *I) { return std::make_pair(false, I->getOpcode()); })
- .Case<VPWidenIntrinsicRecipe>([](auto *I) {
- return std::make_pair(true, I->getVectorIntrinsicID());
- })
- .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
- // For recipes that do not directly map to LLVM IR instructions,
- // assign opcodes after the last VPInstruction opcode (which is also
- // after the last IR Instruction opcode), based on the VPDefID.
- return std::make_pair(false,
- VPInstruction::OpsEnd + 1 + I->getVPDefID());
- })
- .Default([](auto *) { return std::nullopt; });
- }
-
/// If recipe \p R will lower to a GEP with a non-i8 source element type,
/// return that source element type.
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
@@ -4119,7 +4129,7 @@ static bool isAlreadyNarrow(VPValue *VPV) {
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
- if (!VectorLoop)
+ if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0)
return;
VPTypeAnalysis TypeInfo(Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 06c3d75..4db92e7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
}
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
- if (auto *Expanded = Plan.getSCEVExpansion(Expr))
- return Expanded;
VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
Expanded = Plan.getOrAddLiveIn(E->getValue());
@@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
}
}
- Plan.addSCEVExpansion(Expr, Expanded);
return Expanded;
}
@@ -92,6 +89,60 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}
+bool vputils::isSingleScalar(const VPValue *VPV) {
+ auto PreservesUniformity = [](unsigned Opcode) -> bool {
+ if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
+ return true;
+ switch (Opcode) {
+ case Instruction::GetElementPtr:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Select:
+ case VPInstruction::Not:
+ case VPInstruction::Broadcast:
+ case VPInstruction::PtrAdd:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ // A live-in must be uniform across the scope of VPlan.
+ if (VPV->isLiveIn())
+ return true;
+
+ if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
+ const VPRegionBlock *RegionOfR = Rep->getRegion();
+ // Don't consider recipes in replicate regions as uniform yet; their first
+ // lane cannot be accessed when executing the replicate region for other
+ // lanes.
+ if (RegionOfR && RegionOfR->isReplicator())
+ return false;
+ return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) &&
+ all_of(Rep->operands(), isSingleScalar));
+ }
+ if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe,
+ VPWidenSelectRecipe>(VPV))
+ return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
+ if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
+ return PreservesUniformity(WidenR->getOpcode()) &&
+ all_of(WidenR->operands(), isSingleScalar);
+ }
+ if (auto *VPI = dyn_cast<VPInstruction>(VPV))
+ return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
+ (PreservesUniformity(VPI->getOpcode()) &&
+ all_of(VPI->operands(), isSingleScalar));
+ if (isa<VPPartialReductionRecipe>(VPV))
+ return false;
+ if (isa<VPReductionRecipe>(VPV))
+ return true;
+ if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
+ return Expr->isSingleScalar();
+
+ // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
+ return isa<VPExpandSCEVRecipe>(VPV);
+}
+
bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
// Live-ins are uniform.
if (V->isLiveIn())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 840a5b9..37cd413 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -41,59 +41,7 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
-inline bool isSingleScalar(const VPValue *VPV) {
- auto PreservesUniformity = [](unsigned Opcode) -> bool {
- if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
- return true;
- switch (Opcode) {
- case Instruction::GetElementPtr:
- case Instruction::ICmp:
- case Instruction::FCmp:
- case Instruction::Select:
- case VPInstruction::Not:
- case VPInstruction::Broadcast:
- case VPInstruction::PtrAdd:
- return true;
- default:
- return false;
- }
- };
-
- // A live-in must be uniform across the scope of VPlan.
- if (VPV->isLiveIn())
- return true;
-
- if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
- const VPRegionBlock *RegionOfR = Rep->getRegion();
- // Don't consider recipes in replicate regions as uniform yet; their first
- // lane cannot be accessed when executing the replicate region for other
- // lanes.
- if (RegionOfR && RegionOfR->isReplicator())
- return false;
- return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) &&
- all_of(Rep->operands(), isSingleScalar));
- }
- if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe,
- VPWidenSelectRecipe>(VPV))
- return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
- if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
- return PreservesUniformity(WidenR->getOpcode()) &&
- all_of(WidenR->operands(), isSingleScalar);
- }
- if (auto *VPI = dyn_cast<VPInstruction>(VPV))
- return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
- (PreservesUniformity(VPI->getOpcode()) &&
- all_of(VPI->operands(), isSingleScalar));
- if (isa<VPPartialReductionRecipe>(VPV))
- return false;
- if (isa<VPReductionRecipe>(VPV))
- return true;
- if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
- return Expr->isSingleScalar();
-
- // VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
- return isa<VPExpandSCEVRecipe>(VPV);
-}
+bool isSingleScalar(const VPValue *VPV);
/// Return true if \p V is a header mask in \p Plan.
bool isHeaderMask(const VPValue *V, const VPlan &Plan);