diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 170 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 270 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 69 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64StackTagging.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp | 3 |
15 files changed, 266 insertions, 341 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index c4b43e1..c52487a 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -176,6 +176,9 @@ public: std::optional<AArch64PACKey::ID> PACKey, uint64_t PACDisc, Register PACAddrDisc); + // Emit the sequence for PAC. + void emitPtrauthSign(const MachineInstr *MI); + // Emit the sequence to compute the discriminator. // // The returned register is either unmodified AddrDisc or ScratchReg. @@ -2175,6 +2178,37 @@ void AArch64AsmPrinter::emitPtrauthAuthResign( OutStreamer->emitLabel(EndSym); } +void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) { + Register Val = MI->getOperand(1).getReg(); + auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm(); + uint64_t Disc = MI->getOperand(3).getImm(); + Register AddrDisc = MI->getOperand(4).getReg(); + bool AddrDiscKilled = MI->getOperand(4).isKill(); + + // As long as at least one of Val and AddrDisc is in GPR64noip, a scratch + // register is available. + Register ScratchReg = Val == AArch64::X16 ? AArch64::X17 : AArch64::X16; + assert(ScratchReg != AddrDisc && + "Neither X16 nor X17 is available as a scratch register"); + + // Compute pac discriminator + assert(isUInt<16>(Disc)); + Register DiscReg = emitPtrauthDiscriminator( + Disc, AddrDisc, ScratchReg, /*MayUseAddrAsScratch=*/AddrDiscKilled); + bool IsZeroDisc = DiscReg == AArch64::XZR; + unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc); + + // paciza x16 ; if IsZeroDisc + // pacia x16, x17 ; if !IsZeroDisc + MCInst PACInst; + PACInst.setOpcode(Opc); + PACInst.addOperand(MCOperand::createReg(Val)); + PACInst.addOperand(MCOperand::createReg(Val)); + if (!IsZeroDisc) + PACInst.addOperand(MCOperand::createReg(DiscReg)); + EmitToStreamer(*OutStreamer, PACInst); +} + void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) { bool IsCall = MI->getOpcode() == AArch64::BLRA; unsigned BrTarget = MI->getOperand(0).getReg(); @@ -2890,6 +2924,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { MI->getOperand(4).getImm(), MI->getOperand(5).getReg()); return; + case AArch64::PAC: + emitPtrauthSign(MI); + return; + case AArch64::LOADauthptrstatic: LowerLOADauthptrstatic(*MI); return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f026726..7b49754 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -164,6 +164,9 @@ static cl::opt<bool> UseFEATCPACodegen( /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; +/// Value type used for NZCV flags. +static constexpr MVT FlagsVT = MVT::i32; + static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7}; @@ -3098,6 +3101,83 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI, return BB; } +// Helper function to find the instruction that defined a virtual register. +// If unable to find such instruction, returns nullptr. +static const MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, + Register Reg) { + while (Reg.isVirtual()) { + MachineInstr *DefMI = MRI.getVRegDef(Reg); + assert(DefMI && "Virtual register definition not found"); + unsigned Opcode = DefMI->getOpcode(); + + if (Opcode == AArch64::COPY) { + Reg = DefMI->getOperand(1).getReg(); + // Vreg is defined by copying from physreg. + if (Reg.isPhysical()) + return DefMI; + continue; + } + if (Opcode == AArch64::SUBREG_TO_REG) { + Reg = DefMI->getOperand(2).getReg(); + continue; + } + + return DefMI; + } + return nullptr; +} + +void AArch64TargetLowering::fixupPtrauthDiscriminator( + MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, + MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + Register AddrDisc = AddrDiscOp.getReg(); + int64_t IntDisc = IntDiscOp.getImm(); + assert(IntDisc == 0 && "Blend components are already expanded"); + + const MachineInstr *DiscMI = stripVRegCopies(MRI, AddrDisc); + if (DiscMI) { + switch (DiscMI->getOpcode()) { + case AArch64::MOVKXi: + // blend(addr, imm) which is lowered as "MOVK addr, #imm, #48". + // #imm should be an immediate and not a global symbol, for example. + if (DiscMI->getOperand(2).isImm() && + DiscMI->getOperand(3).getImm() == 48) { + AddrDisc = DiscMI->getOperand(1).getReg(); + IntDisc = DiscMI->getOperand(2).getImm(); + } + break; + case AArch64::MOVi32imm: + case AArch64::MOVi64imm: + // Small immediate integer constant passed via VReg. + if (DiscMI->getOperand(1).isImm() && + isUInt<16>(DiscMI->getOperand(1).getImm())) { + AddrDisc = AArch64::NoRegister; + IntDisc = DiscMI->getOperand(1).getImm(); + } + break; + } + } + + // For uniformity, always use NoRegister, as XZR is not necessarily contained + // in the requested register class. + if (AddrDisc == AArch64::XZR) + AddrDisc = AArch64::NoRegister; + + // Make sure AddrDisc operand respects the register class imposed by MI. + if (AddrDisc && MRI.getRegClass(AddrDisc) != AddrDiscRC) { + Register TmpReg = MRI.createVirtualRegister(AddrDiscRC); + BuildMI(*BB, MI, DL, TII->get(AArch64::COPY), TmpReg).addReg(AddrDisc); + AddrDisc = TmpReg; + } + + AddrDiscOp.setReg(AddrDisc); + IntDiscOp.setImm(IntDisc); +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { @@ -3196,6 +3276,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true); case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); + + case AArch64::PAC: + fixupPtrauthDiscriminator(MI, BB, MI.getOperand(3), MI.getOperand(4), + &AArch64::GPR64noipRegClass); + return BB; } } @@ -3451,7 +3536,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL, } unsigned Opcode = IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; - return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS}); + return DAG.getNode(Opcode, DL, {FlagsVT, MVT::Other}, {Chain, LHS, RHS}); } static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, @@ -3465,7 +3550,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); } - return DAG.getNode(AArch64ISD::FCMP, DL, MVT::i32, LHS, RHS); + return DAG.getNode(AArch64ISD::FCMP, DL, FlagsVT, LHS, RHS); } // The CMP instruction is just an alias for SUBS, and representing it as @@ -3490,7 +3575,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // (a.k.a. ANDS) except that the flags are only guaranteed to work for one // of the signed comparisons. const SDValue ANDSNode = - DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, MVT_CC), + DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, FlagsVT), LHS.getOperand(0), LHS.getOperand(1)); // Replace all users of (and X, Y) with newly generated (ands X, Y) DAG.ReplaceAllUsesWith(LHS, ANDSNode); @@ -3501,7 +3586,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } - return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS) + return DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS) .getValue(1); } @@ -3597,7 +3682,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); - return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); + return DAG.getNode(Opcode, DL, FlagsVT, LHS, RHS, NZCVOp, Condition, CCOp); } /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be @@ -4036,7 +4121,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); // Check that the result fits into a 32-bit integer. - SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC); + SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT); if (IsSigned) { // cmp xreg, wreg, sxtw SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value); @@ -4059,12 +4144,12 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { DAG.getConstant(63, DL, MVT::i64)); // It is important that LowerBits is last, otherwise the arithmetic // shift will not be folded into the compare (SUBS). - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); + SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); } else { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); + SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, DL, MVT::i64), @@ -4075,7 +4160,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { } // switch (...) if (Opc) { - SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op->getValueType(0), FlagsVT); // Emit the AArch64 operation with overflow check. Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); @@ -4177,7 +4262,7 @@ static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) { SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value; SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT); SDValue Cmp = - DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1); + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Op0, Op1); return Cmp.getValue(1); } @@ -4220,16 +4305,15 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry); SDLoc DL(Op); - SDVTList VTs = DAG.getVTList(VT0, VT1); - SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS, + SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, FlagsVT), OpLHS, OpRHS, OpCarryIn); SDValue OutFlag = IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG) : carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry); - return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag); + return DAG.getMergeValues({Sum, OutFlag}, DL); } static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { @@ -4254,8 +4338,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { Overflow = DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Value, Overflow); + return DAG.getMergeValues({Value, Overflow}, DL); } // Prefetch operands are: @@ -6813,7 +6896,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); SDValue Result = DAG.getMemIntrinsicNode( AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other), - {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, + {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo), + DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()}, StoreNode->getMemoryVT(), StoreNode->getMemOperand()); return Result; } @@ -7037,9 +7121,8 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(0)); // Generate SUBS & CSEL. - SDValue Cmp = - DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), - Op.getOperand(0), DAG.getConstant(0, DL, VT)); + SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), + Op.getOperand(0), DAG.getConstant(0, DL, VT)); return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, DAG.getConstant(AArch64CC::PL, DL, MVT::i32), Cmp.getValue(1)); @@ -11108,7 +11191,7 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op, SDValue Carry = Op.getOperand(2); // SBCS uses a carry not a borrow so the carry flag should be inverted first. SDValue InvCarry = valueToCarryFlag(Carry, DAG, true); - SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, MVT::Glue), + SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS, InvCarry); EVT OpVT = Op.getValueType(); @@ -12441,10 +12524,10 @@ SDValue AArch64TargetLowering::LowerAsmOutputForConstraint( // Get NZCV register. Only update chain when copyfrom is glued. if (Glue.getNode()) { - Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue); + Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT, Glue); Chain = Glue.getValue(1); } else - Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32); + Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT); // Extract CC code. SDValue CC = getSETCC(Cond, Glue, DL, DAG); @@ -17343,12 +17426,17 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) { /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr) -bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, +bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, + Value *LaneMask, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); + auto *SI = dyn_cast<StoreInst>(Store); + if (!SI) + return false; + assert(!LaneMask && "Unexpected mask on store"); auto *VecTy = cast<FixedVectorType>(SVI->getType()); assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); @@ -18015,11 +18103,14 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( unsigned ShlAmt = C2->getZExtValue(); if (auto ShouldADD = *N->user_begin(); ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) { - if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) { - unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8; - if ((1ULL << ShlAmt) == ByteVT && - isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT())) - return false; + if (auto Load = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) { + EVT MemVT = Load->getMemoryVT(); + + if (Load->getValueType(0).isScalableVector()) + return (8ULL << ShlAmt) != MemVT.getScalarSizeInBits(); + + if (isIndexedLoadLegal(ISD::PRE_INC, MemVT)) + return (8ULL << ShlAmt) != MemVT.getFixedSizeInBits(); } } } @@ -18588,7 +18679,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, Created.push_back(And.getNode()); } else { SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC); - SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDVTList VTs = DAG.getVTList(VT, FlagsVT); SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0); SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne); @@ -19477,10 +19568,10 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) { // can select to CCMN to avoid the extra mov SDValue AbsOp1 = DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0)); - CCmp = DAG.getNode(AArch64ISD::CCMN, DL, MVT_CC, Cmp1.getOperand(0), AbsOp1, - NZCVOp, Condition, Cmp0); + CCmp = DAG.getNode(AArch64ISD::CCMN, DL, FlagsVT, Cmp1.getOperand(0), + AbsOp1, NZCVOp, Condition, Cmp0); } else { - CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0), + CCmp = DAG.getNode(AArch64ISD::CCMP, DL, FlagsVT, Cmp1.getOperand(0), Cmp1.getOperand(1), NZCVOp, Condition, Cmp0); } return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0), @@ -25129,8 +25220,9 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { if (!TReassocOp && !FReassocOp) return SDValue(); - SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode), - DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp); + SDValue NewCmp = + DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode), + DAG.getVTList(VT, FlagsVT), CmpOpOther, SubsOp); auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) { if (!ReassocOp) @@ -27156,7 +27248,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, : AArch64SysReg::RNDRRS); SDLoc DL(N); SDValue A = DAG.getNode( - AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::i32, MVT::Other), + AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, FlagsVT, MVT::Other), N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32)); SDValue B = DAG.getNode( AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), @@ -27902,16 +27994,16 @@ void AArch64TargetLowering::ReplaceNodeResults( MemVT.getScalarSizeInBits() == 32u || MemVT.getScalarSizeInBits() == 64u)) { + EVT HalfVT = MemVT.getHalfNumVectorElementsVT(*DAG.getContext()); SDValue Result = DAG.getMemIntrinsicNode( AArch64ISD::LDNP, SDLoc(N), - DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), - MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), - MVT::Other}), + DAG.getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}), {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(), LoadNode->getMemOperand()); SDValue Pair = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), MemVT, - Result.getValue(0), Result.getValue(1)); + DAG.getBitcast(HalfVT, Result.getValue(0)), + DAG.getBitcast(HalfVT, Result.getValue(1))); Results.append({Pair, Result.getValue(2) /* Chain */}); return; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 713793e..95d0e3b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -182,6 +182,13 @@ public: MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI, MachineBasicBlock *BB) const; + /// Replace (0, vreg) discriminator components with the operands of blend + /// or with (immediate, NoRegister) when possible. + void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB, + MachineOperand &IntDiscOp, + MachineOperand &AddrDiscOp, + const TargetRegisterClass *AddrDiscRC) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -215,7 +222,8 @@ public: ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const override; - bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + bool lowerInterleavedStore(Instruction *Store, Value *Mask, + ShuffleVectorInst *SVI, unsigned Factor) const override; bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index bc57537..8685d7a0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -20,7 +20,6 @@ #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CFIInstBuilder.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -36,7 +35,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -533,8 +531,9 @@ bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, MBP.LHS = LastInst->getOperand(0); MBP.RHS = MachineOperand::CreateImm(0); - MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE - : MachineBranchPredicate::PRED_EQ; + MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW) + ? MachineBranchPredicate::PRED_NE + : MachineBranchPredicate::PRED_EQ; return false; } @@ -7353,9 +7352,6 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const { case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2: case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1: case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2: - case AArch64MachineCombinerPattern::GATHER_LANE_i32: - case AArch64MachineCombinerPattern::GATHER_LANE_i16: - case AArch64MachineCombinerPattern::GATHER_LANE_i8: return true; } // end switch (Pattern) return false; @@ -7396,252 +7392,11 @@ static bool getMiscPatterns(MachineInstr &Root, return false; } -static bool getGatherPattern(MachineInstr &Root, - SmallVectorImpl<unsigned> &Patterns, - unsigned LoadLaneOpCode, unsigned NumLanes) { - const MachineFunction *MF = Root.getMF(); - - // Early exit if optimizing for size. - if (MF->getFunction().hasMinSize()) - return false; - - const MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - - // The root of the pattern must load into the last lane of the vector. - if (Root.getOperand(2).getImm() != NumLanes - 1) - return false; - - // Check that we have load into all lanes except lane 0. - // For each load we also want to check that: - // 1. It has a single non-debug use (since we will be replacing the virtual - // register) - // 2. That the addressing mode only uses a single offset register. - auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - auto Range = llvm::seq<unsigned>(1, NumLanes - 1); - SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end()); - while (!RemainingLanes.empty() && CurrInstr && - CurrInstr->getOpcode() == LoadLaneOpCode && - MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) && - CurrInstr->getNumOperands() == 4) { - RemainingLanes.erase(CurrInstr->getOperand(2).getImm()); - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); - } - - if (!RemainingLanes.empty()) - return false; - - // Match the SUBREG_TO_REG sequence. - if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG) - return false; - - // Verify that the subreg to reg loads an integer into the first lane. - auto Lane0LoadReg = CurrInstr->getOperand(2).getReg(); - unsigned SingleLaneSizeInBits = 128 / NumLanes; - if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits) - return false; - - // Verify that it also has a single non debug use. - if (!MRI.hasOneNonDBGUse(Lane0LoadReg)) - return false; - - switch (NumLanes) { - case 4: - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32); - break; - case 8: - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16); - break; - case 16: - Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8); - break; - default: - llvm_unreachable("Got bad number of lanes for gather pattern."); - } - - return true; -} - -/// Search for patterns where we use LD1 instructions to load into -/// separate lanes of an 128 bit Neon register. We can increase Memory Level -/// Parallelism by loading into 2 Neon registers instead. -static bool getLoadPatterns(MachineInstr &Root, - SmallVectorImpl<unsigned> &Patterns) { - - // The pattern searches for loads into single lanes. - switch (Root.getOpcode()) { - case AArch64::LD1i32: - return getGatherPattern(Root, Patterns, Root.getOpcode(), 4); - case AArch64::LD1i16: - return getGatherPattern(Root, Patterns, Root.getOpcode(), 8); - case AArch64::LD1i8: - return getGatherPattern(Root, Patterns, Root.getOpcode(), 16); - default: - return false; - } -} - -static void -generateGatherPattern(MachineInstr &Root, - SmallVectorImpl<MachineInstr *> &InsInstrs, - SmallVectorImpl<MachineInstr *> &DelInstrs, - DenseMap<Register, unsigned> &InstrIdxForVirtReg, - unsigned Pattern, unsigned NumLanes) { - - MachineFunction &MF = *Root.getParent()->getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - - // Gather the initial load instructions to build the pattern - SmallVector<MachineInstr *, 16> LoadToLaneInstrs; - MachineInstr *CurrInstr = &Root; - for (unsigned i = 0; i < NumLanes - 1; ++i) { - LoadToLaneInstrs.push_back(CurrInstr); - CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg()); - } - - // Sort the load instructions according to the lane. - llvm::sort(LoadToLaneInstrs, - [](const MachineInstr *A, const MachineInstr *B) { - return A->getOperand(2).getImm() > B->getOperand(2).getImm(); - }); - - MachineInstr *SubregToReg = CurrInstr; - LoadToLaneInstrs.push_back( - MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg())); - auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs); - - const TargetRegisterClass *FPR128RegClass = - MRI.getRegClass(Root.getOperand(0).getReg()); - - auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr, - Register SrcRegister, unsigned Lane, - Register OffsetRegister) { - auto NewRegister = MRI.createVirtualRegister(FPR128RegClass); - MachineInstrBuilder LoadIndexIntoRegister = - BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()), - NewRegister) - .addReg(SrcRegister) - .addImm(Lane) - .addReg(OffsetRegister, getKillRegState(true)); - InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size())); - InsInstrs.push_back(LoadIndexIntoRegister); - return NewRegister; - }; - - // Helper to create load instruction based on opcode - auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg, - Register OffsetReg) -> MachineInstrBuilder { - unsigned Opcode; - switch (NumLanes) { - case 4: - Opcode = AArch64::LDRSui; - break; - case 8: - Opcode = AArch64::LDRHui; - break; - case 16: - Opcode = AArch64::LDRBui; - break; - default: - llvm_unreachable( - "Got unsupported number of lanes in machine-combiner gather pattern"); - } - // Immediate offset load - return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg) - .addReg(OffsetReg) - .addImm(0); // immediate offset - }; - - // Load the remaining lanes into register 0. - auto LanesToLoadToReg0 = - llvm::make_range(LoadToLaneInstrsAscending.begin() + 1, - LoadToLaneInstrsAscending.begin() + NumLanes / 2); - auto PrevReg = SubregToReg->getOperand(0).getReg(); - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) { - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, - LoadInstr->getOperand(3).getReg()); - DelInstrs.push_back(LoadInstr); - } - auto LastLoadReg0 = PrevReg; - - // First load into register 1. Perform a LDRSui to zero out the upper lanes in - // a single instruction. - auto Lane0Load = *LoadToLaneInstrsAscending.begin(); - auto OriginalSplitLoad = - *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2); - auto DestRegForMiddleIndex = MRI.createVirtualRegister( - MRI.getRegClass(Lane0Load->getOperand(0).getReg())); - - MachineInstrBuilder MiddleIndexLoadInstr = - CreateLoadInstruction(NumLanes, DestRegForMiddleIndex, - OriginalSplitLoad->getOperand(3).getReg()); - - InstrIdxForVirtReg.insert( - std::make_pair(DestRegForMiddleIndex, InsInstrs.size())); - InsInstrs.push_back(MiddleIndexLoadInstr); - DelInstrs.push_back(OriginalSplitLoad); - - // Subreg To Reg instruction for register 1. - auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass); - unsigned SubregType; - switch (NumLanes) { - case 4: - SubregType = AArch64::ssub; - break; - case 8: - SubregType = AArch64::hsub; - break; - case 16: - SubregType = AArch64::bsub; - break; - default: - llvm_unreachable( - "Got invalid NumLanes for machine-combiner gather pattern"); - } - - auto SubRegToRegInstr = - BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()), - DestRegForSubregToReg) - .addImm(0) - .addReg(DestRegForMiddleIndex, getKillRegState(true)) - .addImm(SubregType); - InstrIdxForVirtReg.insert( - std::make_pair(DestRegForSubregToReg, InsInstrs.size())); - InsInstrs.push_back(SubRegToRegInstr); - - // Load remaining lanes into register 1. - auto LanesToLoadToReg1 = - llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1, - LoadToLaneInstrsAscending.end()); - PrevReg = SubRegToRegInstr->getOperand(0).getReg(); - for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) { - PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1, - LoadInstr->getOperand(3).getReg()); - if (Index == NumLanes / 2 - 2) { - break; - } - DelInstrs.push_back(LoadInstr); - } - auto LastLoadReg1 = PrevReg; - - // Create the final zip instruction to combine the results. - MachineInstrBuilder ZipInstr = - BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64), - Root.getOperand(0).getReg()) - .addReg(LastLoadReg0) - .addReg(LastLoadReg1); - InsInstrs.push_back(ZipInstr); -} - CombinerObjective AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const { switch (Pattern) { case AArch64MachineCombinerPattern::SUBADD_OP1: case AArch64MachineCombinerPattern::SUBADD_OP2: - case AArch64MachineCombinerPattern::GATHER_LANE_i32: - case AArch64MachineCombinerPattern::GATHER_LANE_i16: - case AArch64MachineCombinerPattern::GATHER_LANE_i8: return CombinerObjective::MustReduceDepth; default: return TargetInstrInfo::getCombinerObjective(Pattern); @@ -7671,10 +7426,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getMiscPatterns(Root, Patterns)) return true; - // Load patterns - if (getLoadPatterns(Root, Patterns)) - return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); } @@ -8930,21 +8681,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs); break; } - case AArch64MachineCombinerPattern::GATHER_LANE_i32: { - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, - Pattern, 4); - break; - } - case AArch64MachineCombinerPattern::GATHER_LANE_i16: { - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, - Pattern, 8); - break; - } - case AArch64MachineCombinerPattern::GATHER_LANE_i8: { - generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg, - Pattern, 16); - break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 02734866..7c255da 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -172,10 +172,6 @@ enum AArch64MachineCombinerPattern : unsigned { FMULv8i16_indexed_OP2, FNMADD, - - GATHER_LANE_i32, - GATHER_LANE_i16, - GATHER_LANE_i8 }; class AArch64InstrInfo final : public AArch64GenInstrInfo { const AArch64RegisterInfo RI; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9f8a257..07cacfa 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -430,26 +430,27 @@ def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">; def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; + SDTCisInt<0>, + SDTCisVT<1, FlagsVT>]>; // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, - SDTCisVT<3, i32>]>; + SDTCisVT<3, FlagsVT>]>; // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, - SDTCisVT<1, i32>, - SDTCisVT<4, i32>]>; + SDTCisVT<1, FlagsVT>, + SDTCisVT<4, FlagsVT>]>; def SDT_AArch64Brcond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; + SDTCisVT<2, FlagsVT>]>; def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, OtherVT>]>; @@ -458,22 +459,22 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, - SDTCisVT<4, i32>]>; + SDTCisVT<4, FlagsVT>]>; def SDT_AArch64CCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, + [SDTCisVT<0, FlagsVT>, SDTCisInt<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, SDTCisInt<4>, SDTCisVT<5, i32>]>; def SDT_AArch64FCCMP : SDTypeProfile<1, 5, - [SDTCisVT<0, i32>, + [SDTCisVT<0, FlagsVT>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, SDTCisInt<4>, SDTCisVT<5, i32>]>; -def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, +def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>, SDTCisFP<1>, SDTCisSameAs<2, 1>]>; def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>; @@ -518,10 +519,10 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; // Generates the general dynamic sequences, i.e. // adrp x0, :tlsdesc:var @@ -1124,10 +1125,10 @@ def AArch64probedalloca SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPMayStore]>; -// MRS, also sets the flags via a glue. +// MRS, also sets the flags. def AArch64mrs : SDNode<"AArch64ISD::MRS", SDTypeProfile<2, 1, [SDTCisVT<0, i64>, - SDTCisVT<1, i32>, + SDTCisVT<1, FlagsVT>, SDTCisVT<2, i32>]>, [SDNPHasChain]>; @@ -2032,7 +2033,7 @@ let Predicates = [HasPAuth] in { def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; } - defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; + defm PAC : SignAuth<0b000, 0b010, "pac", null_frag>; defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; def XPACI : ClearAuth<0, "xpaci">; @@ -2152,6 +2153,26 @@ let Predicates = [HasPAuth] in { let Uses = []; } + // PAC pseudo instruction. In AsmPrinter, it is expanded into an actual PAC* + // instruction immediately preceded by the discriminator computation. + // This enforces the expected immediate modifier is used for signing, even + // if an attacker is able to substitute AddrDisc. + def PAC : Pseudo<(outs GPR64:$SignedVal), + (ins GPR64:$Val, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc), + [], "$SignedVal = $Val">, Sched<[WriteI, ReadI]> { + let isCodeGenOnly = 1; + let hasSideEffects = 0; + let mayStore = 0; + let mayLoad = 0; + let Size = 12; + let Defs = [X16, X17]; + let usesCustomInserter = 1; + } + + // A standalone pattern is used, so that literal 0 can be passed as $Disc. + def : Pat<(int_ptrauth_sign GPR64:$Val, timm:$Key, GPR64noip:$AddrDisc), + (PAC GPR64:$Val, $Key, 0, GPR64noip:$AddrDisc)>; + // AUT and re-PAC a value, using different keys/data. // This directly manipulates x16/x17, which are the only registers that // certain OSs guarantee are safe to use for sensitive operations. @@ -3934,6 +3955,26 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; +// load zero-extended i32, bitcast to f64 +def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; + +// load zero-extended i16, bitcast to f64 +def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; + +// load zero-extended i8, bitcast to f64 +def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; + +// load zero-extended i16, bitcast to f32 +def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; + +// load zero-extended i8, bitcast to f32 +def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))), + (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; + // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 61bf87f..1a7609b 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -305,7 +305,8 @@ def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">; def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">; // Condition code regclass. -def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { +defvar FlagsVT = i32; +def CCR : RegisterClass<"AArch64", [FlagsVT], 32, (add NZCV)> { let CopyCost = -1; // Don't allow copying of status registers. // CCR is not allocatable. diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index bafb8d0..8a5b5ba 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -32,10 +32,29 @@ AArch64SelectionDAGInfo::AArch64SelectionDAGInfo() void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const { + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); + #ifndef NDEBUG + // Some additional checks not yet implemented by verifyTargetNode. + constexpr MVT FlagsVT = MVT::i32; switch (N->getOpcode()) { - default: - return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); + case AArch64ISD::SUBS: + assert(N->getValueType(1) == FlagsVT); + break; + case AArch64ISD::ADC: + case AArch64ISD::SBC: + assert(N->getOperand(2).getValueType() == FlagsVT); + break; + case AArch64ISD::ADCS: + case AArch64ISD::SBCS: + assert(N->getValueType(1) == FlagsVT); + assert(N->getOperand(2).getValueType() == FlagsVT); + break; + case AArch64ISD::CSEL: + case AArch64ISD::CSINC: + case AArch64ISD::BRCOND: + assert(N->getOperand(3).getValueType() == FlagsVT); + break; case AArch64ISD::SADDWT: case AArch64ISD::SADDWB: case AArch64ISD::UADDWT: diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp index 75c7dd9..f136a184 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -581,7 +581,6 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { // statement if return_twice functions are called. bool StandardLifetime = !SInfo.CallsReturnTwice && - SInfo.UnrecognizedLifetimes.empty() && memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI, ClMaxLifetimes); if (StandardLifetime) { @@ -616,10 +615,5 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { memtag::annotateDebugRecords(Info, Tag); } - // If we have instrumented at least one alloca, all unrecognized lifetime - // intrinsics have to go. - for (auto *I : SInfo.UnrecognizedLifetimes) - I->eraseFromParent(); - return true; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp index c218831..85de2d5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -36,7 +36,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, // SHF_AARCH64_PURECODE flag set if the "+execute-only" target feature is // present. if (TM.getMCSubtargetInfo()->hasFeature(AArch64::FeatureExecuteOnly)) { - auto *Text = cast<MCSectionELF>(TextSection); + auto *Text = static_cast<MCSectionELF *>(TextSection); Text->setFlags(Text->getFlags() | ELF::SHF_AARCH64_PURECODE); } } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 90d3d92..40f49da 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -249,7 +249,7 @@ static bool hasPossibleIncompatibleOps(const Function *F) { return false; } -uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const { +APInt AArch64TTIImpl::getFeatureMask(const Function &F) const { StringRef AttributeStr = isMultiversionedFunction(F) ? "fmv-features" : "target-features"; StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index b27eb2e..7f45177 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -89,7 +89,7 @@ public: unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override; - uint64_t getFeatureMask(const Function &F) const override; + APInt getFeatureMask(const Function &F) const override; bool isMultiversionedFunction(const Function &F) const override; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 08f547a..6257e99 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -523,7 +523,8 @@ void AArch64TargetELFStreamer::finish() { // mark it execute-only if it is empty and there is at least one // execute-only section in the object. if (any_of(Asm, [](const MCSection &Sec) { - return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_AARCH64_PURECODE; + return static_cast<const MCSectionELF &>(Sec).getFlags() & + ELF::SHF_AARCH64_PURECODE; })) { auto *Text = static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection()); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 3d4a14b..1a9bce5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -9,8 +9,6 @@ #include "AArch64MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 1ac340a..a22a17a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -132,7 +132,8 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section, // But only if they don't point to a few forbidden sections. if (!Symbol.isInSection()) return true; - const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection()); + const MCSectionMachO &RefSec = + static_cast<MCSectionMachO &>(Symbol.getSection()); if (RefSec.getType() == MachO::S_CSTRING_LITERALS) return false; |