diff options
Diffstat (limited to 'llvm/lib/Transforms')
28 files changed, 393 insertions, 222 deletions
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 077d29f..3b59ebb 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -272,6 +272,9 @@ AA::getInitialValueForObj(Attributor &A, const AbstractAttribute &QueryingAA, } if (RangePtr && !RangePtr->offsetOrSizeAreUnknown()) { + int64_t StorageSize = DL.getTypeStoreSize(&Ty); + if (StorageSize != RangePtr->Size) + return nullptr; APInt Offset = APInt(64, RangePtr->Offset); return ConstantFoldLoadFromConst(Initializer, &Ty, Offset, DL); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 6d16599..5048561 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1044,15 +1044,13 @@ struct AAPointerInfoImpl return AAPointerInfo::manifest(A); } - virtual const_bin_iterator begin() const override { return State::begin(); } - virtual const_bin_iterator end() const override { return State::end(); } - virtual int64_t numOffsetBins() const override { - return State::numOffsetBins(); - } - virtual bool reachesReturn() const override { + const_bin_iterator begin() const override { return State::begin(); } + const_bin_iterator end() const override { return State::end(); } + int64_t numOffsetBins() const override { return State::numOffsetBins(); } + bool reachesReturn() const override { return !ReturnedOffsets.isUnassigned(); } - virtual void addReturnedOffsetsTo(OffsetInfo &OI) const override { + void addReturnedOffsetsTo(OffsetInfo &OI) const override { if (ReturnedOffsets.isUnknown()) { OI.setUnknown(); return; @@ -6653,7 +6651,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) : AAHeapToStack(IRP, A) {} - ~AAHeapToStackFunction() { + ~AAHeapToStackFunction() override { // Ensure we call the destructor so we release any memory allocated in the // sets. for (auto &It : AllocationInfos) @@ -8374,7 +8372,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { AccessKind2Accesses.fill(nullptr); } - ~AAMemoryLocationImpl() { + ~AAMemoryLocationImpl() override { // The AccessSets are allocated via a BumpPtrAllocator, we call // the destructor manually. for (AccessSet *AS : AccessKind2Accesses) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 5e2247f..d7eb745 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2693,7 +2693,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) : AAExecutionDomain(IRP, A) {} - ~AAExecutionDomainFunction() { delete RPOT; } + ~AAExecutionDomainFunction() override { delete RPOT; } void initialize(Attributor &A) override { Function *F = getAnchorScope(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 669d4f0..8d9933b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index ede73f8..9c75d9a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -72,7 +72,7 @@ public: : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, PSI, DL, RPOT) {} - virtual ~InstCombinerImpl() = default; + ~InstCombinerImpl() override = default; /// Perform early cleanup and prepare the InstCombine worklist. bool prepareWorklist(Function &F); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5aa8de3..f5130da 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(), CondVal, FalseVal)); + // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X, + // bitwidth-1, 1 -> scmp(X, 0) + // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + CmpPredicate Pred; + Value *CmpLHS, *CmpRHS; + + // Canonicalize sign function ashr patterns: + // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0) + // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0) + if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)), + m_Value(TrueVal), m_Value(FalseVal))) && + ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) && + match(TrueVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) && + match(FalseVal, m_One())) || + (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) && + match(TrueVal, m_One()) && + match(FalseVal, + m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) { + + Function *Scmp = Intrinsic::getOrInsertDeclaration( + SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()}); + return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)}); + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 67e2aae..67f837c 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2327,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, return ConstantVector::get(NewVecC); } +// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS). +static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, + Constant *Splat, bool SplatLHS, + const DataLayout &DL) { + ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount(); + Constant *LHS = ConstantVector::getSplat(EC, Splat); + Constant *RHS = Vector; + if (!SplatLHS) + std::swap(LHS, RHS); + return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); +} + Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { if (!isa<VectorType>(Inst.getType())) return nullptr; @@ -2338,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { assert(cast<VectorType>(RHS->getType())->getElementCount() == cast<VectorType>(Inst.getType())->getElementCount()); + auto foldConstantsThroughSubVectorInsertSplat = + [&](Value *MaybeSubVector, Value *MaybeSplat, + bool SplatLHS) -> Instruction * { + Value *Idx; + Constant *Splat, *SubVector, *Dest; + if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) || + !match(MaybeSubVector, + m_VectorInsert(m_Constant(Dest), m_Constant(SubVector), + m_Value(Idx)))) + return nullptr; + SubVector = + constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL); + Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL); + if (!SubVector || !Dest) + return nullptr; + auto *InsertVector = + Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx); + return replaceInstUsesWith(Inst, InsertVector); + }; + + // If one operand is a constant splat and the other operand is a + // `vector.insert` where both the destination and subvector are constant, + // apply the operation to both the destination and subvector, returning a new + // constant `vector.insert`. This helps constant folding for scalable vectors. + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false)) + return Folded; + if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat( + /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true)) + return Folded; + // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation // of the results. @@ -3315,21 +3358,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (TyAllocSize == 1) { // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), - // but only if the result pointer is only used as if it were an integer, - // or both point to the same underlying object (otherwise provenance is - // not necessarily retained). + // but only if the result pointer is only used as if it were an integer. + // (The case where the underlying object is the same is handled by + // InstSimplify.) Value *X = GEP.getPointerOperand(); Value *Y; - if (match(GEP.getOperand(1), - m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && + if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)), + m_PtrToIntOrAddr(m_Specific(X)))) && GEPType == Y->getType()) { - bool HasSameUnderlyingObject = - getUnderlyingObject(X) == getUnderlyingObject(Y); + bool HasNonAddressBits = + DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS); bool Changed = false; GEP.replaceUsesWithIf(Y, [&](Use &U) { - bool ShouldReplace = HasSameUnderlyingObject || - isa<ICmpInst>(U.getUser()) || - isa<PtrToIntInst>(U.getUser()); + bool ShouldReplace = isa<PtrToAddrInst>(U.getUser()) || + (!HasNonAddressBits && + isa<ICmpInst, PtrToIntInst>(U.getUser())); Changed |= ShouldReplace; return ShouldReplace; }); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index cb6ca72..7c364f8 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands( IID == Intrinsic::experimental_vp_strided_load) { Stride = VPI->getOperand(PtrOpNo + 1); // Use the pointer alignment as the element alignment if the stride is a - // mutiple of the pointer alignment. Otherwise, the element alignment + // multiple of the pointer alignment. Otherwise, the element alignment // should be Align(1). unsigned PointerAlign = Alignment.valueOrOne().value(); if (!isa<ConstantInt>(Stride) || @@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF( // Putting globals in a comdat changes the semantic and potentially cause // false negative odr violations at link time. If odr indicators are used, we - // keep the comdat sections, as link time odr violations will be dectected on + // keep the comdat sections, as link time odr violations will be detected on // the odr indicator symbols. bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty(); @@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { I->eraseFromParent(); } - // Replace all uses of AddessReturnedByAlloca with NewAddressPtr. + // Replace all uses of AddressReturnedByAlloca with NewAddressPtr. AI->replaceAllUsesWith(NewAddressPtr); // We are done. Erase old alloca from parent. diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 72e8e50..0688bc7 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -359,7 +359,7 @@ class CHR { unsigned Count = 0; // Find out how many times region R is cloned. Note that if the parent // of R is cloned, R is also cloned, but R's clone count is not updated - // from the clone of the parent. We need to accumlate all the counts + // from the clone of the parent. We need to accumulate all the counts // from the ancestors to get the clone count. while (R) { Count += DuplicationCount[R]; @@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, BI->swapSuccessors(); // Don't need to swap this in terms of // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based - // mean whehter the branch is likely go into the if-then rather than + // mean whether the branch is likely go into the if-then rather than // successor0/successor1 and because we can tell which edge is the then or // the else one by comparing the destination to the region exit block. continue; diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index cf87e35..1e5946a 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -83,7 +83,7 @@ static cl::opt<unsigned> // ICP the candidate function even when only a declaration is present. static cl::opt<bool> ICPAllowDecls( "icp-allow-decls", cl::init(false), cl::Hidden, - cl::desc("Promote the target candidate even when the defintion " + cl::desc("Promote the target candidate even when the definition " " is not available")); // ICP hot candidate functions only. When setting to false, non-cold functions diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 5e7548b..7795cce 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate( cl::init(false)); // If the option is not specified, the default behavior about whether -// counter promotion is done depends on how instrumentaiton lowering +// counter promotion is done depends on how instrumentation lowering // pipeline is setup, i.e., the default value of true of this option // does not mean the promotion will be done by default. Explicitly // setting this option can override the default behavior. @@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); assert(It != ProfileDataMap.end() && It->second.DataVar && - "value profiling detected in function with no counter incerement"); + "value profiling detected in function with no counter increment"); GlobalVariable *DataVar = It->second.DataVar; uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp index 3c0f185..05616d8 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp @@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) { } } -// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible +// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible // to the runtime, changing shadow count behavior. void createMemprofHistogramFlagVar(Module &M) { const StringRef VarName(MemProfHistogramFlagVar); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb..471c6ec 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>. + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa<VectorType>(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) || isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -3107,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// If we don't instrument it and it gets inlined, /// our interceptor will not kick in and we will lose the memmove. /// If we instrument the call here, but it does not get inlined, - /// we will memove the shadow twice: which is bad in case + /// we will memmove the shadow twice: which is bad in case /// of overlapping regions. So, we simply lower the intrinsic to a call. /// /// Similar situation exists for memcpy and memset. @@ -4746,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // _mm_round_ps / _mm_round_ps. // Similar to maybeHandleSimpleNomemIntrinsic except - // the second argument is guranteed to be a constant integer. + // the second argument is guaranteed to be a constant integer. void handleRoundPdPsIntrinsic(IntrinsicInst &I) { assert(I.getArgOperand(0)->getType() == I.getType()); assert(I.arg_size() == 2); @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index f5b6686..5f87ed6 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M, assert(areAllBBsReachable( F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M) .getManager()) && - "Function has unreacheable basic blocks. The expectation was that " + "Function has unreachable basic blocks. The expectation was that " "DCE was run before."); auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F)); diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp index 0a358d4..de7c169 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { Value *RealContext = nullptr; StructType *ThisContextType = nullptr; - Value *TheRootFuctionData = nullptr; + Value *TheRootFunctionData = nullptr; Value *ExpectedCalleeTLSAddr = nullptr; Value *CallsiteInfoTLSAddr = nullptr; const bool HasMusttail = [&F]() { @@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { Guid = Builder.getInt64( AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue()))); // The type of the context of this function is now knowable since we have - // NumCallsites and NumCounters. We delcare it here because it's more + // NumCallsites and NumCounters. We declare it here because it's more // convenient - we have the Builder. ThisContextType = StructType::get( F.getContext(), @@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { ArrayType::get(Builder.getPtrTy(), NumCallsites)}); // Figure out which way we obtain the context object for this function - // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the - // former case, we also set TheRootFuctionData since we need to release it - // at the end (plus it can be used to know if we have an entrypoint or a - // regular function) - // Don't set a name, they end up taking a lot of space and we don't need - // them. + // former case, we also set TheRootFunctionData since we need to release + // it at the end (plus it can be used to know if we have an entrypoint or + // a regular function). Don't set a name, they end up taking a lot of + // space and we don't need them. // Zero-initialize the FunctionData, except for functions that have // musttail calls. There, we set the CtxRoot field to 1, which will be // treated as a "can't be set as root". - TheRootFuctionData = new GlobalVariable( + TheRootFunctionData = new GlobalVariable( M, FunctionDataTy, false, GlobalVariable::InternalLinkage, HasMusttail ? CannotBeRootInitializer : Constant::getNullValue(FunctionDataTy)); if (ContextRootSet.contains(&F)) { Context = Builder.CreateCall( - StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters), + StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters), Builder.getInt32(NumCallsites)}); ORE.emit( [&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); }); } else { - Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid, + Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid, Builder.getInt32(NumCounters), Builder.getInt32(NumCallsites)}); ORE.emit([&] { @@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { } else if (!HasMusttail && isa<ReturnInst>(I)) { // Remember to release the context if we are an entrypoint. IRBuilder<> Builder(&I); - Builder.CreateCall(ReleaseCtx, {TheRootFuctionData}); + Builder.CreateCall(ReleaseCtx, {TheRootFunctionData}); ContextWasReleased = true; } } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 120c4f6..71736cf 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions( function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, function_ref<LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType) { - // For the context-sensitve instrumentation, we should have a separated pass + // For the context-sensitive instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. if (InstrumentationType == PGOInstrumentationType::FDO) createIRLevelProfileFlagVar(M, InstrumentationType); @@ -2248,7 +2248,7 @@ static bool annotateAllFunctions( Func.populateCoverage(); continue; } - // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, + // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo, // it means the profile for the function is unrepresentative and this // function is actually hot / warm. We will reset the function hot / cold // attribute and drop all the profile counters. diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp index 4801ac7..210b126 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp @@ -481,15 +481,18 @@ StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { } // namespace SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( - SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles) - : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} + SanitizerBinaryMetadataOptions Opts, + IntrusiveRefCntPtr<vfs::FileSystem> VFS, + ArrayRef<std::string> IgnorelistFiles) + : Options(std::move(Opts)), + VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()), + IgnorelistFiles(std::move(IgnorelistFiles)) {} PreservedAnalyses SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { std::unique_ptr<SpecialCaseList> Ignorelist; if (!IgnorelistFiles.empty()) { - Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, - *vfs::getRealFileSystem()); + Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, *VFS); if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index b74a070..09abf6a 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -318,6 +318,18 @@ private: }; } // namespace +SanitizerCoveragePass::SanitizerCoveragePass( + SanitizerCoverageOptions Options, IntrusiveRefCntPtr<vfs::FileSystem> VFS, + const std::vector<std::string> &AllowlistFiles, + const std::vector<std::string> &BlocklistFiles) + : Options(std::move(Options)), + VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()) { + if (AllowlistFiles.size() > 0) + Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, *this->VFS); + if (BlocklistFiles.size() > 0) + Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, *this->VFS); +} + PreservedAnalyses SanitizerCoveragePass::run(Module &M, ModuleAnalysisManager &MAM) { auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 0d48a35..fd0e9f1 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) { } // Do not instrument known races/"benign races" that come from compiler -// instrumentatin. The user has no way of suppressing them. +// instrumentation. The user has no way of suppressing them. static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { // Peel off GEPs and BitCasts. Addr = Addr->stripInBoundsOffsets(); diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 9471ae3..78d4a57e 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate( // doesn't match, then we call the runtime (which may yet determine that // the mismatch is okay). // - // The checks generated below have the following strucutre. + // The checks generated below have the following structure. // // ; First we load the descriptor for the load from shadow memory and // ; compare it against the type descriptor for the current access type. diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 371d9e6..a9ab3b3 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -819,7 +819,7 @@ public: OS << "]"; } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) { OS << "; switch predicate info { CaseValue: " << *PS->CaseValue - << " Switch:" << *PS->Switch << " Edge: ["; + << " Edge: ["; PS->From->printAsOperand(OS); OS << ","; PS->To->printAsOperand(OS); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index d831c27..c537be5c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7551,6 +7551,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, /// log2(C)-indexed value table (instead of traditionally emitting a load of the /// address of the jump target, and indirectly jump to it). static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *Condition = SI->getCondition(); @@ -7573,12 +7574,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, if (SI->getNumCases() < 4) return false; - // We perform this optimization only for switches with - // unreachable default case. - // This assumtion will save us from checking if `Condition` is a power of two. - if (!SI->defaultDestUnreachable()) - return false; - // Check that switch cases are powers of two. SmallVector<uint64_t, 4> Values; for (const auto &Case : SI->cases()) { @@ -7598,6 +7593,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, Builder.SetInsertPoint(SI); + if (!SI->defaultDestUnreachable()) { + // Let non-power-of-two inputs jump to the default case, when the latter is + // reachable. + auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition); + auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1)); + + auto *OrigBB = SI->getParent(); + auto *DefaultCaseBB = SI->getDefaultDest(); + BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU); + auto It = OrigBB->getTerminator()->getIterator(); + BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It); + It->eraseFromParent(); + + addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}}); + } + // Replace each case with its trailing zeros number. for (auto &Case : SI->cases()) { auto *OrigValue = Case.getCaseValue(); @@ -7953,7 +7966,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { Options.ConvertSwitchToLookupTable)) return requestResimplify(); - if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) + if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (reduceSwitchRange(SI, Builder, DL, TTI)) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3356516..facb0fa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4378,8 +4378,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( const SCEV *TC = vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE); assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable"); - RemainingIterations = - SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC)); + const SCEV *KnownMinTC; + bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale())); + // Use versions of TC and VF in which both are either scalable or fixed. + if (ScalableTC == MainLoopVF.isScalable()) + RemainingIterations = + SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC)); + else if (ScalableTC) { + const SCEV *EstimatedTC = SE.getMulExpr( + KnownMinTC, + SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1))); + RemainingIterations = SE.getURemExpr( + EstimatedTC, SE.getElementCount(TCType, MainLoopVF * IC)); + } else + RemainingIterations = + SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC)); // No iterations left to process in the epilogue. if (RemainingIterations->isZero()) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cdb9e7e..4fcaf6d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -17641,12 +17641,28 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { [](Value *V) { return !isa<GetElementPtrInst>(V) && isa<Instruction>(V); })) || - all_of(E->Scalars, [&](Value *V) { - return isa<PoisonValue>(V) || - (E->Idx == 0 && isa<InsertElementInst>(V)) || - E->isCopyableElement(V) || - (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V)); - })) + (all_of(E->Scalars, + [&](Value *V) { + return isa<PoisonValue>(V) || + (E->Idx == 0 && isa<InsertElementInst>(V)) || + E->isCopyableElement(V) || + (!isVectorLikeInstWithConstOps(V) && + isUsedOutsideBlock(V)); + }) && + (!E->doesNotNeedToSchedule() || + any_of(E->Scalars, + [&](Value *V) { + if (!isa<Instruction>(V) || + (E->hasCopyableElements() && E->isCopyableElement(V))) + return false; + return !areAllOperandsNonInsts(V); + }) || + none_of(E->Scalars, [&](Value *V) { + if (!isa<Instruction>(V) || + (E->hasCopyableElements() && E->isCopyableElement(V))) + return false; + return MustGather.contains(V); + })))) Res = FindLastInst(); else Res = FindFirstInst(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a1ad2db..5b9f005 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -398,7 +398,7 @@ public: DebugLoc DL = DebugLoc::getUnknown()) : VPDef(SC), VPUser(Operands), DL(DL) {} - virtual ~VPRecipeBase() = default; + ~VPRecipeBase() override = default; /// Clone the current recipe. virtual VPRecipeBase *clone() = 0; @@ -576,7 +576,7 @@ public: return R && classof(R); } - virtual VPSingleDefRecipe *clone() override = 0; + VPSingleDefRecipe *clone() override = 0; /// Returns the underlying instruction. Instruction *getUnderlyingInstr() { @@ -907,7 +907,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { return R && classof(R); } - virtual VPRecipeWithIRFlags *clone() override = 0; + VPRecipeWithIRFlags *clone() override = 0; static inline bool classof(const VPSingleDefRecipe *U) { auto *R = dyn_cast<VPRecipeBase>(U); @@ -2068,7 +2068,7 @@ public: return classof(static_cast<const VPRecipeBase *>(R)); } - virtual void execute(VPTransformState &State) override = 0; + void execute(VPTransformState &State) override = 0; /// Returns the step value of the induction. VPValue *getStepValue() { return getOperand(1); } @@ -2557,7 +2557,7 @@ public: VPCostContext &Ctx) const override; /// Returns true if the recipe only uses the first lane of operand \p Op. - virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; + bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; /// Returns the number of stored operands of this interleave group. Returns 0 /// for load interleave groups. @@ -4172,11 +4172,6 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector<VPValue *, 16> VPLiveIns; - /// Mapping from SCEVs to the VPValues representing their expansions. - /// NOTE: This mapping is temporary and will be removed once all users have - /// been modeled in VPlan directly. - DenseMap<const SCEV *, VPValue *> SCEVToExpansion; - /// Blocks allocated and owned by the VPlan. They will be deleted once the /// VPlan is destroyed. SmallVector<VPBlockBase *> CreatedBlocks; @@ -4424,15 +4419,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - VPValue *getSCEVExpansion(const SCEV *S) const { - return SCEVToExpansion.lookup(S); - } - - void addSCEVExpansion(const SCEV *S, VPValue *V) { - assert(!SCEVToExpansion.contains(S) && "SCEV already expanded"); - SCEVToExpansion[S] = V; - } - /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. VPlan *duplicate(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3e85e6f..84817d7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { } } +/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. +/// Returns an optional pair, where the first element indicates whether it is +/// an intrinsic ID. +static std::optional<std::pair<bool, unsigned>> +getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { + return TypeSwitch<const VPSingleDefRecipe *, + std::optional<std::pair<bool, unsigned>>>(R) + .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, + VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( + [](auto *I) { return std::make_pair(false, I->getOpcode()); }) + .Case<VPWidenIntrinsicRecipe>([](auto *I) { + return std::make_pair(true, I->getVectorIntrinsicID()); + }) + .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) + .Default([](auto *) { return std::nullopt; }); +} + /// Try to fold \p R using InstSimplifyFolder. Will succeed and return a -/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are -/// foldable live-ins. -static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, - ArrayRef<VPValue *> Operands, - const DataLayout &DL, VPTypeAnalysis &TypeInfo) { +/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p +/// Operands are foldable live-ins. +static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R, + ArrayRef<VPValue *> Operands, + const DataLayout &DL, + VPTypeAnalysis &TypeInfo) { + auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R); + if (!OpcodeOrIID) + return nullptr; + SmallVector<Value *, 4> Ops; for (VPValue *Op : Operands) { if (!Op->isLiveIn() || !Op->getLiveInIRValue()) @@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, Ops.push_back(Op->getLiveInIRValue()); } - InstSimplifyFolder Folder(DL); - if (Instruction::isBinaryOp(Opcode)) - return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0], + auto FoldToIRValue = [&]() -> Value * { + InstSimplifyFolder Folder(DL); + if (OpcodeOrIID->first) { + if (R.getNumOperands() != 2) + return nullptr; + unsigned ID = OpcodeOrIID->second; + return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1], + TypeInfo.inferScalarType(&R)); + } + unsigned Opcode = OpcodeOrIID->second; + if (Instruction::isBinaryOp(Opcode)) + return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), + Ops[0], Ops[1]); + if (Instruction::isCast(Opcode)) + return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], + TypeInfo.inferScalarType(R.getVPSingleValue())); + switch (Opcode) { + case VPInstruction::LogicalAnd: + return Folder.FoldSelect(Ops[0], Ops[1], + ConstantInt::getNullValue(Ops[1]->getType())); + case VPInstruction::Not: + return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], + Constant::getAllOnesValue(Ops[0]->getType())); + case Instruction::Select: + return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ICmp: + case Instruction::FCmp: + return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], Ops[1]); - if (Instruction::isCast(Opcode)) - return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0], - TypeInfo.inferScalarType(R.getVPSingleValue())); - switch (Opcode) { - case VPInstruction::LogicalAnd: - return Folder.FoldSelect(Ops[0], Ops[1], - ConstantInt::getNullValue(Ops[1]->getType())); - case VPInstruction::Not: - return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0], - Constant::getAllOnesValue(Ops[0]->getType())); - case Instruction::Select: - return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]); - case Instruction::ICmp: - case Instruction::FCmp: - return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0], - Ops[1]); - case Instruction::GetElementPtr: { - auto &RFlags = cast<VPRecipeWithIRFlags>(R); - auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); - return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops), - RFlags.getGEPNoWrapFlags()); - } - case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: - return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0], - Ops[1], - cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); - // An extract of a live-in is an extract of a broadcast, so return the - // broadcasted element. - case Instruction::ExtractElement: - assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); - return Ops[0]; - } + case Instruction::GetElementPtr: { + auto &RFlags = cast<VPRecipeWithIRFlags>(R); + auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr()); + return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], + drop_begin(Ops), RFlags.getGEPNoWrapFlags()); + } + case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: + return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), + Ops[0], Ops[1], + cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags()); + // An extract of a live-in is an extract of a broadcast, so return the + // broadcasted element. + case Instruction::ExtractElement: + assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar"); + return Ops[0]; + } + return nullptr; + }; + + if (Value *V = FoldToIRValue()) + return R.getParent()->getPlan()->getOrAddLiveIn(V); return nullptr; } @@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // Simplification of live-in IR values for SingleDef recipes using // InstSimplifyFolder. - if (TypeSwitch<VPRecipeBase *, bool>(&R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) { - const DataLayout &DL = - Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); - Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL, - TypeInfo); - if (V) - I->replaceAllUsesWith(Plan->getOrAddLiveIn(V)); - return V; - }) - .Default([](auto *) { return false; })) - return; + const DataLayout &DL = + Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout(); + if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo)) + return Def->replaceAllUsesWith(V); // Fold PredPHI LiveIn -> LiveIn. if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) { @@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { return Def == getEmptyKey() || Def == getTombstoneKey(); } - /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R. - /// Returns an optional pair, where the first element indicates whether it is - /// an intrinsic ID. - static std::optional<std::pair<bool, unsigned>> - getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) { - return TypeSwitch<const VPSingleDefRecipe *, - std::optional<std::pair<bool, unsigned>>>(R) - .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe, - VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( - [](auto *I) { return std::make_pair(false, I->getOpcode()); }) - .Case<VPWidenIntrinsicRecipe>([](auto *I) { - return std::make_pair(true, I->getVectorIntrinsicID()); - }) - .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) { - // For recipes that do not directly map to LLVM IR instructions, - // assign opcodes after the last VPInstruction opcode (which is also - // after the last IR Instruction opcode), based on the VPDefID. - return std::make_pair(false, - VPInstruction::OpsEnd + 1 + I->getVPDefID()); - }) - .Default([](auto *) { return std::nullopt; }); - } - /// If recipe \p R will lower to a GEP with a non-i8 source element type, /// return that source element type. static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) { @@ -4119,7 +4129,7 @@ static bool isAlreadyNarrow(VPValue *VPV) { void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VectorRegWidth) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); - if (!VectorLoop) + if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0) return; VPTypeAnalysis TypeInfo(Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 06c3d75..4db92e7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) { } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { - if (auto *Expanded = Plan.getSCEVExpansion(Expr)) - return Expanded; VPValue *Expanded = nullptr; if (auto *E = dyn_cast<SCEVConstant>(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); @@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } } - Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } @@ -92,6 +89,60 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); } +bool vputils::isSingleScalar(const VPValue *VPV) { + auto PreservesUniformity = [](unsigned Opcode) -> bool { + if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) + return true; + switch (Opcode) { + case Instruction::GetElementPtr: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Select: + case VPInstruction::Not: + case VPInstruction::Broadcast: + case VPInstruction::PtrAdd: + return true; + default: + return false; + } + }; + + // A live-in must be uniform across the scope of VPlan. + if (VPV->isLiveIn()) + return true; + + if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) { + const VPRegionBlock *RegionOfR = Rep->getRegion(); + // Don't consider recipes in replicate regions as uniform yet; their first + // lane cannot be accessed when executing the replicate region for other + // lanes. + if (RegionOfR && RegionOfR->isReplicator()) + return false; + return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) && + all_of(Rep->operands(), isSingleScalar)); + } + if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe, + VPWidenSelectRecipe>(VPV)) + return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); + if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) { + return PreservesUniformity(WidenR->getOpcode()) && + all_of(WidenR->operands(), isSingleScalar); + } + if (auto *VPI = dyn_cast<VPInstruction>(VPV)) + return VPI->isSingleScalar() || VPI->isVectorToScalar() || + (PreservesUniformity(VPI->getOpcode()) && + all_of(VPI->operands(), isSingleScalar)); + if (isa<VPPartialReductionRecipe>(VPV)) + return false; + if (isa<VPReductionRecipe>(VPV)) + return true; + if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV)) + return Expr->isSingleScalar(); + + // VPExpandSCEVRecipes must be placed in the entry and are always uniform. + return isa<VPExpandSCEVRecipe>(VPV); +} + bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { // Live-ins are uniform. if (V->isLiveIn()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 840a5b9..37cd413 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -41,59 +41,7 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); /// Returns true if \p VPV is a single scalar, either because it produces the /// same value for all lanes or only has its first lane used. -inline bool isSingleScalar(const VPValue *VPV) { - auto PreservesUniformity = [](unsigned Opcode) -> bool { - if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) - return true; - switch (Opcode) { - case Instruction::GetElementPtr: - case Instruction::ICmp: - case Instruction::FCmp: - case Instruction::Select: - case VPInstruction::Not: - case VPInstruction::Broadcast: - case VPInstruction::PtrAdd: - return true; - default: - return false; - } - }; - - // A live-in must be uniform across the scope of VPlan. - if (VPV->isLiveIn()) - return true; - - if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) { - const VPRegionBlock *RegionOfR = Rep->getRegion(); - // Don't consider recipes in replicate regions as uniform yet; their first - // lane cannot be accessed when executing the replicate region for other - // lanes. - if (RegionOfR && RegionOfR->isReplicator()) - return false; - return Rep->isSingleScalar() || (PreservesUniformity(Rep->getOpcode()) && - all_of(Rep->operands(), isSingleScalar)); - } - if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe, - VPWidenSelectRecipe>(VPV)) - return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); - if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) { - return PreservesUniformity(WidenR->getOpcode()) && - all_of(WidenR->operands(), isSingleScalar); - } - if (auto *VPI = dyn_cast<VPInstruction>(VPV)) - return VPI->isSingleScalar() || VPI->isVectorToScalar() || - (PreservesUniformity(VPI->getOpcode()) && - all_of(VPI->operands(), isSingleScalar)); - if (isa<VPPartialReductionRecipe>(VPV)) - return false; - if (isa<VPReductionRecipe>(VPV)) - return true; - if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV)) - return Expr->isSingleScalar(); - - // VPExpandSCEVRecipes must be placed in the entry and are alway uniform. - return isa<VPExpandSCEVRecipe>(VPV); -} +bool isSingleScalar(const VPValue *VPV); /// Return true if \p V is a header mask in \p Plan. bool isHeaderMask(const VPValue *V, const VPlan &Plan); |
