aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp224
1 files changed, 83 insertions, 141 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index dd68a55..0565fcd 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -131,25 +131,56 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
: Constant::getAllOnesValue(XLenTy);
return true;
}
- auto *VPLdSt = cast<VPIntrinsic>(I);
- assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
- VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
- "Unexpected intrinsic");
- Ptr = VPLdSt->getMemoryPointerParam();
- Alignment = VPLdSt->getPointerAlignment().value_or(
- DL.getABITypeAlign(VTy->getElementType()));
-
- assert(Mask && "vp.load and vp.store needs a mask!");
-
- Value *WideEVL = VPLdSt->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
- return false;
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- return true;
+ auto *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported intrinsic type");
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store: {
+ auto *VPLdSt = cast<VPIntrinsic>(I);
+ Ptr = VPLdSt->getMemoryPointerParam();
+ Alignment = VPLdSt->getPointerAlignment().value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+
+ assert(Mask && "vp.load and vp.store needs a mask!");
+
+ Value *WideEVL = VPLdSt->getVectorLengthParam();
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+ return false;
+
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ Ptr = II->getOperand(0);
+ Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue();
+
+ if (!isa<UndefValue>(II->getOperand(3)))
+ return false;
+
+ assert(Mask && "masked.load needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ case Intrinsic::masked_store: {
+ Ptr = II->getOperand(1);
+ Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue();
+
+ assert(Mask && "masked.store needs a mask!");
+
+ VL = isa<FixedVectorType>(VTy)
+ ? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ }
}
/// Lower an interleaved load into a vlsegN intrinsic.
@@ -173,7 +204,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
const DataLayout &DL = Load->getDataLayout();
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
- auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -201,6 +232,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
+ Alignment = commonAlignment(Alignment, Indices[0] * ScalarSizeInBytes);
CI->addParamAttr(0,
Attribute::getWithAlignment(CI->getContext(), Alignment));
Shuffles[0]->replaceAllUsesWith(CI);
@@ -234,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// vsseg3 instruction in CodeGen.
-bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
+bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
+ Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
- IRBuilder<> Builder(SI);
- const DataLayout &DL = SI->getDataLayout();
+ IRBuilder<> Builder(Store);
+ const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
// Given SVI : <n*factor x ty>, then VTy : <n x ty>
auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
ShuffleVTy->getNumElements() / Factor);
- if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
- SI->getPointerAddressSpace(), DL))
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
+
+ Value *Ptr, *VL;
+ Align Alignment;
+ if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
return false;
- auto *PtrTy = SI->getPointerOperandType();
- auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+ Type *PtrTy = Ptr->getType();
+ unsigned AS = PtrTy->getPointerAddressSpace();
+ if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ return false;
unsigned Index;
// If the segment store only has one active lane (i.e. the interleave is
@@ -260,26 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
unsigned ScalarSizeInBytes =
DL.getTypeStoreSize(ShuffleVTy->getElementType());
Value *Data = SVI->getOperand(0);
- auto *DataVTy = cast<FixedVectorType>(Data->getType());
+ Data = Builder.CreateExtractVector(VTy, Data, uint64_t(0));
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
- Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
- Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
- Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(),
- VTy->getElementCount());
-
- CallInst *CI = Builder.CreateIntrinsic(
- Intrinsic::experimental_vp_strided_store,
- {Data->getType(), BasePtr->getType(), Stride->getType()},
- {Data, BasePtr, Stride, Mask, VL});
- CI->addParamAttr(
- 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
+ Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
+ // Note: Same VL as above, but i32 not xlen due to signature of
+ // vp.strided.store
+ VL = Builder.CreateElementCount(Builder.getInt32Ty(),
+ VTy->getElementCount());
+ CallInst *CI =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
+ {VTy, BasePtr->getType(), Stride->getType()},
+ {Data, BasePtr, Stride, LaneMask, VL});
+ Alignment = commonAlignment(Alignment, Index * ScalarSizeInBytes);
+ CI->addParamAttr(1,
+ Attribute::getWithAlignment(CI->getContext(), Alignment));
return true;
}
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
@@ -295,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
NewShuffleMask.clear();
}
- // This VL should be OK (should be executable in one vsseg instruction,
- // potentially under larger LMULs) because we checked that the fixed vector
- // type fits in isLegalInterleavedAccessType
- Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount());
- Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount());
- Ops.append({SI->getPointerOperand(), StoreMask, VL});
-
+ Ops.append({Ptr, LaneMask, VL});
Builder.CreateCall(VssegNFunc, Ops);
return true;
@@ -318,7 +351,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
VectorType *ResVTy = getDeinterleavedVectorType(DI);
const DataLayout &DL = Load->getDataLayout();
- auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+ auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -339,8 +372,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
Type *VecTupTy = TargetExtType::get(
Load->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
- NumElts * SEW / 8),
+ ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8),
Factor);
Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
Load->getModule(), ScalableVlsegIntrIds[Factor - 2],
@@ -381,7 +413,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
const DataLayout &DL = Store->getDataLayout();
- Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
+ Type *XLenTy = Builder.getIntNTy(Subtarget.getXLen());
Value *Ptr, *VL;
Align Alignment;
@@ -405,9 +437,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
Type *VecTupTy = TargetExtType::get(
Store->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
- NumElts * SEW / 8),
- Factor);
+ ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8), Factor);
Value *StoredVal = PoisonValue::get(VecTupTy);
for (unsigned i = 0; i < Factor; ++i)
@@ -424,91 +454,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
Builder.CreateCall(VssegNFunc, Operands);
return true;
}
-
-/// Lower an interleaved vp.store into a vssegN intrinsic.
-///
-/// E.g. Lower an interleaved vp.store (Factor = 2):
-///
-/// %is = tail call <vscale x 64 x i8>
-/// @llvm.vector.interleave2.nxv64i8(
-/// <vscale x 32 x i8> %load0,
-/// <vscale x 32 x i8> %load1
-/// %wide.rvl = shl nuw nsw i32 %rvl, 1
-/// tail call void @llvm.vp.store.nxv64i8.p0(
-/// <vscale x 64 x i8> %is, ptr %ptr,
-/// %mask,
-/// i32 %wide.rvl)
-///
-/// Into:
-/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
-/// <vscale x 32 x i8> %load1,
-/// <vscale x 32 x i8> %load2, ptr %ptr,
-/// %mask,
-/// i64 %rvl)
-bool RISCVTargetLowering::lowerInterleavedVPStore(
- VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOperands) const {
- assert(Mask && "Expect a valid mask");
- assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
- "Unexpected intrinsic");
-
- const unsigned Factor = InterleaveOperands.size();
-
- auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0]->getType());
- if (!VTy)
- return false;
-
- const DataLayout &DL = Store->getDataLayout();
- Align Alignment = Store->getParamAlign(1).value_or(
- DL.getABITypeAlign(VTy->getElementType()));
- if (!isLegalInterleavedAccessType(
- VTy, Factor, Alignment,
- Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
- return false;
-
- IRBuilder<> Builder(Store);
- Value *WideEVL = Store->getArgOperand(3);
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor))
- return false;
-
- auto *PtrTy = Store->getArgOperand(1)->getType();
- auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- Value *EVL =
- Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
-
- if (isa<FixedVectorType>(VTy)) {
- SmallVector<Value *, 8> Operands(InterleaveOperands);
- Operands.append({Store->getArgOperand(1), Mask, EVL});
- Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
- {VTy, PtrTy, XLenTy}, Operands);
- return true;
- }
-
- unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
- unsigned NumElts = VTy->getElementCount().getKnownMinValue();
- Type *VecTupTy = TargetExtType::get(
- Store->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
- NumElts * SEW / 8),
- Factor);
-
- Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
- Value *StoredVal = PoisonValue::get(VecTupTy);
- for (unsigned i = 0; i < Factor; ++i)
- StoredVal = Builder.CreateCall(
- VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)});
-
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), ScalableVssegIntrIds[Factor - 2],
- {VecTupTy, PtrTy, Mask->getType(), EVL->getType()});
-
- Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL,
- ConstantInt::get(XLenTy, Log2_64(SEW))};
-
- Builder.CreateCall(VssegNFunc, Operands);
- return true;
-}