diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 66 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 8 |
8 files changed, 89 insertions, 66 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 12fc976..201bfe0 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1205,32 +1205,36 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, Register DstReg = MI.getOperand(0).getReg(); if (DstReg == MI.getOperand(3).getReg()) { // Expand to BIT - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 - : AArch64::BITv16i8)) - .add(MI.getOperand(0)) - .add(MI.getOperand(3)) - .add(MI.getOperand(2)) - .add(MI.getOperand(1)); + auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 + : AArch64::BITv16i8)) + .add(MI.getOperand(0)) + .add(MI.getOperand(3)) + .add(MI.getOperand(2)) + .add(MI.getOperand(1)); + transferImpOps(MI, I, I); } else if (DstReg == MI.getOperand(2).getReg()) { // Expand to BIF - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 - : AArch64::BIFv16i8)) - .add(MI.getOperand(0)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(1)); + auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 + : AArch64::BIFv16i8)) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(1)); + transferImpOps(MI, I, I); } else { // Expand to BSL, use additional move if required if (DstReg == MI.getOperand(1).getReg()) { - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 - : AArch64::BSLv16i8)) - .add(MI.getOperand(0)) - .add(MI.getOperand(1)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)); + auto I = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 + : AArch64::BSLv16i8)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + transferImpOps(MI, I, I); } else { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 @@ -1240,15 +1244,17 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, getRenamableRegState(MI.getOperand(0).isRenamable())) .add(MI.getOperand(1)) .add(MI.getOperand(1)); - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 - : AArch64::BSLv16i8)) - .add(MI.getOperand(0)) - .addReg(DstReg, - RegState::Kill | - getRenamableRegState(MI.getOperand(0).isRenamable())) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)); + auto I2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 + : AArch64::BSLv16i8)) + .add(MI.getOperand(0)) + .addReg(DstReg, + RegState::Kill | getRenamableRegState( + MI.getOperand(0).isRenamable())) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + transferImpOps(MI, I2, I2); } } MI.eraseFromParent(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ff23f76..02ee517 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { } } - return true; + EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType(); + return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 || + PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64; } unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { @@ -17155,7 +17157,7 @@ static Function *getStructuredStoreFunction(Module *M, unsigned Factor, /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 bool AArch64TargetLowering::lowerInterleavedLoad( - LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, + Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); @@ -17163,6 +17165,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad( assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); + auto *LI = dyn_cast<LoadInst>(Load); + if (!LI) + return false; + assert(!Mask && "Unexpected mask on a load"); + const DataLayout &DL = LI->getDataLayout(); VectorType *VTy = Shuffles[0]->getType(); @@ -17336,12 +17343,17 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) { /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr) -bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, +bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, + Value *LaneMask, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); + auto *SI = dyn_cast<StoreInst>(Store); + if (!SI) + return false; + assert(!LaneMask && "Unexpected mask on store"); auto *VecTy = cast<FixedVectorType>(SVI->getType()); assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7b1de3d..d8403c2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -211,11 +211,12 @@ public: unsigned getMaxSupportedInterleaveFactor() const override { return 4; } - bool lowerInterleavedLoad(LoadInst *LI, + bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const override; - bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + bool lowerInterleavedStore(Instruction *Store, Value *Mask, + ShuffleVectorInst *SVI, unsigned Factor) const override; bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6c46b18..9f8a257 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1053,13 +1053,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>; def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>; -def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), - [(abdu node:$lhs, node:$rhs), - (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; -def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), - [(abds node:$lhs, node:$rhs), - (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; - // Add Pairwise of two vectors def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; // Add Long Pairwise @@ -5667,8 +5660,7 @@ let Predicates = [HasFullFP16] in { // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - AArch64uabd>; +defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", abdu>; // Match UABDL in log2-shuffle patterns. def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), (zext (v8i8 V64:$opB))))), @@ -6018,8 +6010,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; + TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", abds>; defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; @@ -6037,8 +6029,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; + TriOpFrag<(add node:$LHS, (abdu node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", abdu>; defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; @@ -6759,10 +6751,8 @@ defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn> defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; -defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - AArch64sabd>; -defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - AArch64sabd>; +defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>; +defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>; defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", @@ -6780,8 +6770,7 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; -defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - AArch64uabd>; +defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", abdu>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 0ddd17c..abcd550 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -8,8 +8,8 @@ // // This pass performs below peephole optimizations on MIR level. // -// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri -// MOVi64imm + ANDXrr ==> ANDXri + ANDXri +// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri +// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri // // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi // MOVi64imm + ADDXrr ==> ANDXri + ANDXri @@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); template <typename T> - bool visitAND(unsigned Opc, MachineInstr &MI); + bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0); bool visitORR(MachineInstr &MI); bool visitCSEL(MachineInstr &MI); bool visitINSERT(MachineInstr &MI); @@ -194,12 +194,12 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { } template <typename T> -bool AArch64MIPeepholeOpt::visitAND( - unsigned Opc, MachineInstr &MI) { +bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI, + unsigned OtherOpc) { // Try below transformation. // - // MOVi32imm + ANDWrr ==> ANDWri + ANDWri - // MOVi64imm + ANDXrr ==> ANDXri + ANDXri + // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri + // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri // // The mov pseudo instruction could be expanded to multiple mov instructions // later. Let's try to split the constant operand of mov instruction into two @@ -208,10 +208,10 @@ bool AArch64MIPeepholeOpt::visitAND( return splitTwoPartImm<T>( MI, - [Opc](T Imm, unsigned RegSize, T &Imm0, - T &Imm1) -> std::optional<OpcodePair> { + [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0, + T &Imm1) -> std::optional<OpcodePair> { if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) - return std::make_pair(Opc, Opc); + return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc); return std::nullopt; }, [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, @@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { case AArch64::ANDXrr: Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI); break; + case AArch64::ANDSWrr: + Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri); + break; + case AArch64::ANDSXrr: + Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri); + break; case AArch64::ORRWrs: Changed |= visitORR(MI); break; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 2409cc8..0f4f012 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -534,7 +534,7 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference( } void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const { + const SchedRegion &Region) const { // LNT run (at least on Cyclone) showed reasonably significant gains for // bi-directional scheduling. 253.perlbmk. Policy.OnlyTopDown = false; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 154db3c..061ed61 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -343,7 +343,8 @@ public: } void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; + const SchedRegion &Region) const override; + void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 473ba5e..bb0f667b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -287,6 +287,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .moreElementsToNextPow2(0) .lower(); + getActionDefinitionsBuilder({G_ABDS, G_ABDU}) + .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) + .lower(); + getActionDefinitionsBuilder( {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) .legalFor({{s32, s32}, {s64, s32}}) @@ -1794,6 +1798,10 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(AArch64::G_SMULL); case Intrinsic::aarch64_neon_umull: return LowerBinOp(AArch64::G_UMULL); + case Intrinsic::aarch64_neon_sabd: + return LowerBinOp(TargetOpcode::G_ABDS); + case Intrinsic::aarch64_neon_uabd: + return LowerBinOp(TargetOpcode::G_ABDU); case Intrinsic::aarch64_neon_abs: { // Lower the intrinsic to G_ABS. MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)}); |