diff options
author | Paul Walker <paul.walker@arm.com> | 2025-06-11 11:02:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-11 11:02:32 +0100 |
commit | ea9046699eae04ac5159a1666f19b5b32e5d41c1 (patch) | |
tree | 92234623c59e09e43223a26e39af2985f83b4cbe /llvm/lib | |
parent | c59cc2b690b9e528a82ba214f74a8f7c8abb3cde (diff) | |
download | llvm-ea9046699eae04ac5159a1666f19b5b32e5d41c1.zip llvm-ea9046699eae04ac5159a1666f19b5b32e5d41c1.tar.gz llvm-ea9046699eae04ac5159a1666f19b5b32e5d41c1.tar.bz2 |
[LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (#130973)
For function whose vscale_range is limited to a single value we can size
scalable vectors. This aids SROA by allowing scalable vector load and
store operations to be considered for replacement whereby bitcasts
through memory can be replaced by vector insert or extract operations.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/Function.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 165 |
2 files changed, 136 insertions, 41 deletions
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 63665d8..493dec7 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1165,6 +1165,18 @@ bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } +unsigned Function::getVScaleValue() const { + Attribute Attr = getFnAttribute(Attribute::VScaleRange); + if (!Attr.isValid()) + return 0; + + unsigned VScale = Attr.getVScaleRangeMin(); + if (VScale && VScale == Attr.getVScaleRangeMax()) + return VScale; + + return 0; +} + bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) { if (F && F->nullPointerIsDefined()) return true; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a4e373d..42d1d9a 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1120,8 +1120,13 @@ private: return PI.setEscapedReadOnly(&LI); TypeSize Size = DL.getTypeStoreSize(LI.getType()); - if (Size.isScalable()) - return PI.setAborted(&LI); + if (Size.isScalable()) { + unsigned VScale = LI.getFunction()->getVScaleValue(); + if (!VScale) + return PI.setAborted(&LI); + + Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale); + } return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(), LI.isVolatile()); @@ -1135,8 +1140,13 @@ private: return PI.setAborted(&SI); TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType()); - if (StoreSize.isScalable()) - return PI.setAborted(&SI); + if (StoreSize.isScalable()) { + unsigned VScale = SI.getFunction()->getVScaleValue(); + if (!VScale) + return PI.setAborted(&SI); + + StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale); + } uint64_t Size = StoreSize.getFixedValue(); @@ -1927,7 +1937,8 @@ static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { /// ensure that we only try to convert viable values. The strategy is that we /// will peel off single element struct and array wrappings to get to an /// underlying value, and convert that value. -static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { +static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, + unsigned VScale = 0) { if (OldTy == NewTy) return true; @@ -1941,8 +1952,35 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return false; } - if (DL.getTypeSizeInBits(NewTy).getFixedValue() != - DL.getTypeSizeInBits(OldTy).getFixedValue()) + TypeSize NewSize = DL.getTypeSizeInBits(NewTy); + TypeSize OldSize = DL.getTypeSizeInBits(OldTy); + + if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) || + (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) { + // Conversion is only possible when the size of scalable vectors is known. + if (!VScale) + return false; + + // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within + // a single domain (either fixed or scalable). Any additional conversion + // between fixed and scalable types is handled through integer types. + auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy; + auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy; + + if (isa<ScalableVectorType>(NewTy)) { + if (!VectorType::getWithSizeAndScalar(cast<VectorType>(NewVTy), OldVTy)) + return false; + + NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale); + } else { + if (!VectorType::getWithSizeAndScalar(cast<VectorType>(OldVTy), NewVTy)) + return false; + + OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale); + } + } + + if (NewSize != OldSize) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) return false; @@ -1992,7 +2030,14 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy) { Type *OldTy = V->getType(); - assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); + +#ifndef NDEBUG + BasicBlock *BB = IRB.GetInsertBlock(); + assert(BB && BB->getParent() && "VScale unknown!"); + unsigned VScale = BB->getParent()->getVScaleValue(); + assert(canConvertValue(DL, OldTy, NewTy, VScale) && + "Value not convertable to type"); +#endif if (OldTy == NewTy) return V; @@ -2000,13 +2045,41 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) && "Integer types must be the exact same to convert."); + // A variant of bitcast that supports a mixture of fixed and scalable types + // that are know to have the same size. + auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * { + Type *InTy = In->getType(); + if (InTy == Ty) + return In; + + if (isa<FixedVectorType>(InTy) && isa<ScalableVectorType>(Ty)) { + // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> --> + // <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16> + auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(Ty), InTy); + return IRB.CreateBitCast(IRB.CreateInsertVector(VTy, + PoisonValue::get(VTy), In, + IRB.getInt64(0)), + Ty); + } + + if (isa<ScalableVectorType>(InTy) && isa<FixedVectorType>(Ty)) { + // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> --> + // <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32> + auto *VTy = VectorType::getWithSizeAndScalar(cast<VectorType>(InTy), Ty); + return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy), + IRB.getInt64(0)); + } + + return IRB.CreateBitCast(In, Ty); + }; + // See if we need inttoptr for this type pair. May require additional bitcast. if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*> // Directly handle i64 to i8* - return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), + return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)), NewTy); } @@ -2016,7 +2089,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32> // Expand i8* to i64 --> i8* to i64 to i64 - return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), NewTy); } @@ -2031,12 +2104,14 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // size. if (OldAS != NewAS) { assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)); - return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), - NewTy); + return IRB.CreateIntToPtr( + CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), + DL.getIntPtrType(NewTy)), + NewTy); } } - return IRB.CreateBitCast(V, NewTy); + return CreateBitCastLike(V, NewTy); } /// Test whether the given slice use can be promoted to a vector. @@ -2046,7 +2121,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, - const DataLayout &DL) { + const DataLayout &DL, + unsigned VScale) { // First validate the slice offsets. uint64_t BeginOffset = std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset(); @@ -2090,7 +2166,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, assert(LTy->isIntegerTy()); LTy = SplitIntTy; } - if (!canConvertValue(DL, SliceTy, LTy)) + if (!canConvertValue(DL, SliceTy, LTy, VScale)) return false; } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) @@ -2103,7 +2179,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, assert(STy->isIntegerTy()); STy = SplitIntTy; } - if (!canConvertValue(DL, STy, SliceTy)) + if (!canConvertValue(DL, STy, SliceTy, VScale)) return false; } else { return false; @@ -2118,7 +2194,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, /// (and thus isVectorPromotionViable) over all slices of the alloca for the /// given VectorType. static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, - const DataLayout &DL) { + const DataLayout &DL, unsigned VScale) { uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); @@ -2131,11 +2207,11 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, ElementSize /= 8; for (const Slice &S : P) - if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL)) + if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale)) return false; for (const Slice *S : P.splitSliceTails()) - if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL)) + if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale)) return false; return true; @@ -2150,7 +2226,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, - VectorType *CommonVecPtrTy) { + VectorType *CommonVecPtrTy, unsigned VScale) { // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) return nullptr; @@ -2226,7 +2302,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, }); for (VectorType *VTy : CandidateTys) - if (checkVectorTypeForPromotion(P, VTy, DL)) + if (checkVectorTypeForPromotion(P, VTy, DL, VScale)) return VTy; return nullptr; @@ -2237,7 +2313,7 @@ static VectorType *createAndCheckVectorTypesForPromotion( function_ref<void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, - bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) { + bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) { [[maybe_unused]] VectorType *OriginalElt = CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr; // Consider additional vector types where the element type size is a @@ -2262,9 +2338,9 @@ static VectorType *createAndCheckVectorTypesForPromotion( } } - return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, - CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy); + return checkVectorTypesForPromotion( + P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, + HaveCommonVecPtrTy, CommonVecPtrTy, VScale); } /// Test whether the given alloca partitioning and range of slices can be @@ -2276,7 +2352,8 @@ static VectorType *createAndCheckVectorTypesForPromotion( /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, + unsigned VScale) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector<VectorType *, 4> CandidateTys; @@ -2288,7 +2365,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { bool HaveCommonEltTy = true; bool HaveCommonVecPtrTy = true; auto CheckCandidateType = [&](Type *Ty) { - if (auto *VTy = dyn_cast<VectorType>(Ty)) { + if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) { // Return if bitcast to vectors is different for total size in bits. if (!CandidateTys.empty()) { VectorType *V = CandidateTys[0]; @@ -2343,14 +2420,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { if (auto *VTy = createAndCheckVectorTypesForPromotion( LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, - HaveCommonVecPtrTy, CommonVecPtrTy)) + HaveCommonVecPtrTy, CommonVecPtrTy, VScale)) return VTy; CandidateTys.clear(); return createAndCheckVectorTypesForPromotion( DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy, - CommonVecPtrTy); + CommonVecPtrTy, VScale); } /// Test whether a slice of an alloca is valid for integer widening. @@ -2387,7 +2464,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (LI->isVolatile()) return false; // We can't handle loads that extend past the allocated memory. - if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size) + TypeSize LoadSize = DL.getTypeStoreSize(LI->getType()); + if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerLoad. @@ -2412,7 +2490,8 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (SI->isVolatile()) return false; // We can't handle stores that extend past the allocated memory. - if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size) + TypeSize StoreSize = DL.getTypeStoreSize(ValueTy); + if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerStore. @@ -2885,8 +2964,6 @@ private: Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) : LI.getType(); - const bool IsLoadPastEnd = - DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize; bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2896,8 +2973,9 @@ private: } else if (NewBeginOffset == NewAllocaBeginOffset && NewEndOffset == NewAllocaEndOffset && (canConvertValue(DL, NewAllocaTy, TargetTy) || - (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && - TargetTy->isIntegerTy() && !LI.isVolatile()))) { + (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() && + DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize && + !LI.isVolatile()))) { Value *NewPtr = getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile()); LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr, @@ -3070,7 +3148,8 @@ private: if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); - if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) { + TypeSize StoreSize = DL.getTypeStoreSize(V->getType()); + if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) { assert(!SI.isVolatile()); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -4846,14 +4925,18 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Type *SliceTy = nullptr; VectorType *SliceVecTy = nullptr; const DataLayout &DL = AI.getDataLayout(); + unsigned VScale = AI.getFunction()->getVScaleValue(); + std::pair<Type *, IntegerType *> CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()); // Do all uses operate on the same type? - if (CommonUseTy.first) - if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) { + if (CommonUseTy.first) { + TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first); + if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) { SliceTy = CommonUseTy.first; SliceVecTy = dyn_cast<VectorType>(SliceTy); } + } // If not, can we find an appropriate subtype in the original allocated type? if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), @@ -4874,12 +4957,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // If the common use types are not viable for promotion then attempt to find // another type that is viable. - if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL)) + if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL, VScale)) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) { VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy); if (TypePartitionVecTy && - checkVectorTypeForPromotion(P, TypePartitionVecTy, DL)) + checkVectorTypeForPromotion(P, TypePartitionVecTy, DL, VScale)) SliceTy = TypePartitionTy; } @@ -4890,7 +4973,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, VScale); if (VecTy) SliceTy = VecTy; |