diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 83 |
1 files changed, 80 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 31b3d18..fbce3b0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16249,7 +16249,9 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { bool Negated; uint64_t SplatVal; - if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) { + // NOTE: SRAD cannot be used to represent sdiv-by-one. + if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) && + SplatVal > 1) { SDValue Pg = getPredicateForScalableVector(DAG, DL, VT); SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0), @@ -18638,7 +18640,7 @@ bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( } bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -30034,7 +30036,9 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( bool Negated; uint64_t SplatVal; - if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) { + // NOTE: SRAD cannot be used to represent sdiv-by-one. + if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated) && + SplatVal > 1) { EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32); @@ -30606,6 +30610,43 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op, assert(OpVT.isScalableVector() && "Expected scalable vector in LowerVECTOR_DEINTERLEAVE."); + if (Op->getNumOperands() == 3) { + // aarch64_sve_ld3 only supports packed datatypes. + EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount()); + Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment); + + // Write out unmodified operands. + SmallVector<SDValue, 3> Chains; + for (unsigned I = 0; I < 3; ++I) { + SDValue Ptr = + DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL); + SDValue V = getSVESafeBitCast(PackedVT, Op.getOperand(I), DAG); + Chains.push_back( + DAG.getStore(DAG.getEntryNode(), DL, V, Ptr, MachinePointerInfo())); + } + + Intrinsic::ID IntID = Intrinsic::aarch64_sve_ld3_sret; + EVT PredVT = PackedVT.changeVectorElementType(MVT::i1); + + SmallVector<SDValue, 7> Ops; + Ops.push_back(DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains)); + Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64)); + Ops.push_back(DAG.getConstant(1, DL, PredVT)); + Ops.push_back(StackPtr); + + // Read back and deinterleave data. + SDVTList VTs = DAG.getVTList(PackedVT, PackedVT, PackedVT, MVT::Other); + SDValue LD3 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops); + + SmallVector<SDValue, 3> Results; + Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(0), DAG)); + Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(1), DAG)); + Results.push_back(getSVESafeBitCast(OpVT, LD3.getValue(2), DAG)); + return DAG.getMergeValues(Results, DL); + } + // Are multi-register uzp instructions available? if (Subtarget->hasSME2() && Subtarget->isStreaming() && OpVT.getVectorElementType() != MVT::i1) { @@ -30647,6 +30688,42 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op, assert(OpVT.isScalableVector() && "Expected scalable vector in LowerVECTOR_INTERLEAVE."); + if (Op->getNumOperands() == 3) { + // aarch64_sve_st3 only supports packed datatypes. + EVT PackedVT = getPackedSVEVectorVT(OpVT.getVectorElementCount()); + SmallVector<SDValue, 3> InVecs; + for (SDValue V : Op->ops()) + InVecs.push_back(getSVESafeBitCast(PackedVT, V, DAG)); + + Align Alignment = DAG.getReducedAlign(PackedVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(PackedVT.getStoreSize() * 3, Alignment); + + Intrinsic::ID IntID = Intrinsic::aarch64_sve_st3; + EVT PredVT = PackedVT.changeVectorElementType(MVT::i1); + + SmallVector<SDValue, 7> Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64)); + Ops.append(InVecs); + Ops.push_back(DAG.getConstant(1, DL, PredVT)); + Ops.push_back(StackPtr); + + // Interleave operands and store. + SDValue Chain = DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops); + + // Read back the interleaved data. + SmallVector<SDValue, 3> Results; + for (unsigned I = 0; I < 3; ++I) { + SDValue Ptr = + DAG.getMemBasePlusOffset(StackPtr, PackedVT.getStoreSize() * I, DL); + SDValue L = DAG.getLoad(PackedVT, DL, Chain, Ptr, MachinePointerInfo()); + Results.push_back(getSVESafeBitCast(OpVT, L, DAG)); + } + + return DAG.getMergeValues(Results, DL); + } + // Are multi-register zip instructions available? if (Subtarget->hasSME2() && Subtarget->isStreaming() && OpVT.getVectorElementType() != MVT::i1) { |