aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp38
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp170
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp270
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td69
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp23
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp3
15 files changed, 266 insertions, 341 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c4b43e1..c52487a 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -176,6 +176,9 @@ public:
std::optional<AArch64PACKey::ID> PACKey,
uint64_t PACDisc, Register PACAddrDisc);
+ // Emit the sequence for PAC.
+ void emitPtrauthSign(const MachineInstr *MI);
+
// Emit the sequence to compute the discriminator.
//
// The returned register is either unmodified AddrDisc or ScratchReg.
@@ -2175,6 +2178,37 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(
OutStreamer->emitLabel(EndSym);
}
+void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) {
+ Register Val = MI->getOperand(1).getReg();
+ auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm();
+ uint64_t Disc = MI->getOperand(3).getImm();
+ Register AddrDisc = MI->getOperand(4).getReg();
+ bool AddrDiscKilled = MI->getOperand(4).isKill();
+
+ // As long as at least one of Val and AddrDisc is in GPR64noip, a scratch
+ // register is available.
+ Register ScratchReg = Val == AArch64::X16 ? AArch64::X17 : AArch64::X16;
+ assert(ScratchReg != AddrDisc &&
+ "Neither X16 nor X17 is available as a scratch register");
+
+ // Compute pac discriminator
+ assert(isUInt<16>(Disc));
+ Register DiscReg = emitPtrauthDiscriminator(
+ Disc, AddrDisc, ScratchReg, /*MayUseAddrAsScratch=*/AddrDiscKilled);
+ bool IsZeroDisc = DiscReg == AArch64::XZR;
+ unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc);
+
+ // paciza x16 ; if IsZeroDisc
+ // pacia x16, x17 ; if !IsZeroDisc
+ MCInst PACInst;
+ PACInst.setOpcode(Opc);
+ PACInst.addOperand(MCOperand::createReg(Val));
+ PACInst.addOperand(MCOperand::createReg(Val));
+ if (!IsZeroDisc)
+ PACInst.addOperand(MCOperand::createReg(DiscReg));
+ EmitToStreamer(*OutStreamer, PACInst);
+}
+
void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
bool IsCall = MI->getOpcode() == AArch64::BLRA;
unsigned BrTarget = MI->getOperand(0).getReg();
@@ -2890,6 +2924,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
MI->getOperand(4).getImm(), MI->getOperand(5).getReg());
return;
+ case AArch64::PAC:
+ emitPtrauthSign(MI);
+ return;
+
case AArch64::LOADauthptrstatic:
LowerLOADauthptrstatic(*MI);
return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f026726..7b49754 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -164,6 +164,9 @@ static cl::opt<bool> UseFEATCPACodegen(
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+/// Value type used for NZCV flags.
+static constexpr MVT FlagsVT = MVT::i32;
+
static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7};
@@ -3098,6 +3101,83 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
return BB;
}
+// Helper function to find the instruction that defined a virtual register.
+// If unable to find such instruction, returns nullptr.
+static const MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ while (Reg.isVirtual()) {
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ assert(DefMI && "Virtual register definition not found");
+ unsigned Opcode = DefMI->getOpcode();
+
+ if (Opcode == AArch64::COPY) {
+ Reg = DefMI->getOperand(1).getReg();
+ // Vreg is defined by copying from physreg.
+ if (Reg.isPhysical())
+ return DefMI;
+ continue;
+ }
+ if (Opcode == AArch64::SUBREG_TO_REG) {
+ Reg = DefMI->getOperand(2).getReg();
+ continue;
+ }
+
+ return DefMI;
+ }
+ return nullptr;
+}
+
+void AArch64TargetLowering::fixupPtrauthDiscriminator(
+ MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ Register AddrDisc = AddrDiscOp.getReg();
+ int64_t IntDisc = IntDiscOp.getImm();
+ assert(IntDisc == 0 && "Blend components are already expanded");
+
+ const MachineInstr *DiscMI = stripVRegCopies(MRI, AddrDisc);
+ if (DiscMI) {
+ switch (DiscMI->getOpcode()) {
+ case AArch64::MOVKXi:
+ // blend(addr, imm) which is lowered as "MOVK addr, #imm, #48".
+ // #imm should be an immediate and not a global symbol, for example.
+ if (DiscMI->getOperand(2).isImm() &&
+ DiscMI->getOperand(3).getImm() == 48) {
+ AddrDisc = DiscMI->getOperand(1).getReg();
+ IntDisc = DiscMI->getOperand(2).getImm();
+ }
+ break;
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ // Small immediate integer constant passed via VReg.
+ if (DiscMI->getOperand(1).isImm() &&
+ isUInt<16>(DiscMI->getOperand(1).getImm())) {
+ AddrDisc = AArch64::NoRegister;
+ IntDisc = DiscMI->getOperand(1).getImm();
+ }
+ break;
+ }
+ }
+
+ // For uniformity, always use NoRegister, as XZR is not necessarily contained
+ // in the requested register class.
+ if (AddrDisc == AArch64::XZR)
+ AddrDisc = AArch64::NoRegister;
+
+ // Make sure AddrDisc operand respects the register class imposed by MI.
+ if (AddrDisc && MRI.getRegClass(AddrDisc) != AddrDiscRC) {
+ Register TmpReg = MRI.createVirtualRegister(AddrDiscRC);
+ BuildMI(*BB, MI, DL, TII->get(AArch64::COPY), TmpReg).addReg(AddrDisc);
+ AddrDisc = TmpReg;
+ }
+
+ AddrDiscOp.setReg(AddrDisc);
+ IntDiscOp.setImm(IntDisc);
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
@@ -3196,6 +3276,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
case AArch64::MOVT_TIZ_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
+
+ case AArch64::PAC:
+ fixupPtrauthDiscriminator(MI, BB, MI.getOperand(3), MI.getOperand(4),
+ &AArch64::GPR64noipRegClass);
+ return BB;
}
}
@@ -3451,7 +3536,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
}
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
- return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
+ return DAG.getNode(Opcode, DL, {FlagsVT, MVT::Other}, {Chain, LHS, RHS});
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
@@ -3465,7 +3550,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
- return DAG.getNode(AArch64ISD::FCMP, DL, MVT::i32, LHS, RHS);
+ return DAG.getNode(AArch64ISD::FCMP, DL, FlagsVT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
@@ -3490,7 +3575,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
const SDValue ANDSNode =
- DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, MVT_CC),
+ DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, FlagsVT),
LHS.getOperand(0), LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
@@ -3501,7 +3586,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS)
+ return DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS)
.getValue(1);
}
@@ -3597,7 +3682,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
- return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+ return DAG.getNode(Opcode, DL, FlagsVT, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
@@ -4036,7 +4121,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
// Check that the result fits into a 32-bit integer.
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
if (IsSigned) {
// cmp xreg, wreg, sxtw
SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
@@ -4059,12 +4144,12 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ SDVTList VTs = DAG.getVTList(MVT::i64, FlagsVT);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
@@ -4075,7 +4160,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
} // switch (...)
if (Opc) {
- SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), FlagsVT);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
@@ -4177,7 +4262,7 @@ static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Op0, Op1);
return Cmp.getValue(1);
}
@@ -4220,16 +4305,15 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
SDLoc DL(Op);
- SDVTList VTs = DAG.getVTList(VT0, VT1);
- SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
+ SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, FlagsVT), OpLHS,
OpRHS, OpCarryIn);
SDValue OutFlag =
IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
: carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
- return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
+ return DAG.getMergeValues({Sum, OutFlag}, DL);
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
@@ -4254,8 +4338,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
Overflow =
DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Value, Overflow);
+ return DAG.getMergeValues({Value, Overflow}, DL);
}
// Prefetch operands are:
@@ -6813,7 +6896,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
- {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
+ DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
@@ -7037,9 +7121,8 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
@@ -11108,7 +11191,7 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
SDValue Carry = Op.getOperand(2);
// SBCS uses a carry not a borrow so the carry flag should be inverted first.
SDValue InvCarry = valueToCarryFlag(Carry, DAG, true);
- SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, MVT::Glue),
+ SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, FlagsVT),
LHS, RHS, InvCarry);
EVT OpVT = Op.getValueType();
@@ -12441,10 +12524,10 @@ SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
// Get NZCV register. Only update chain when copyfrom is glued.
if (Glue.getNode()) {
- Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT, Glue);
Chain = Glue.getValue(1);
} else
- Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, FlagsVT);
// Extract CC code.
SDValue CC = getSETCC(Cond, Glue, DL, DAG);
@@ -17343,12 +17426,17 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
-bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
+bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
+ Value *LaneMask,
ShuffleVectorInst *SVI,
unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
+ auto *SI = dyn_cast<StoreInst>(Store);
+ if (!SI)
+ return false;
+ assert(!LaneMask && "Unexpected mask on store");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
@@ -18015,11 +18103,14 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
unsigned ShlAmt = C2->getZExtValue();
if (auto ShouldADD = *N->user_begin();
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
- if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
- unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
- if ((1ULL << ShlAmt) == ByteVT &&
- isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
- return false;
+ if (auto Load = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
+ EVT MemVT = Load->getMemoryVT();
+
+ if (Load->getValueType(0).isScalableVector())
+ return (8ULL << ShlAmt) != MemVT.getScalarSizeInBits();
+
+ if (isIndexedLoadLegal(ISD::PRE_INC, MemVT))
+ return (8ULL << ShlAmt) != MemVT.getFixedSizeInBits();
}
}
}
@@ -18588,7 +18679,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
Created.push_back(And.getNode());
} else {
SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ SDVTList VTs = DAG.getVTList(VT, FlagsVT);
SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
@@ -19477,10 +19568,10 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
// can select to CCMN to avoid the extra mov
SDValue AbsOp1 =
DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0));
- CCmp = DAG.getNode(AArch64ISD::CCMN, DL, MVT_CC, Cmp1.getOperand(0), AbsOp1,
- NZCVOp, Condition, Cmp0);
+ CCmp = DAG.getNode(AArch64ISD::CCMN, DL, FlagsVT, Cmp1.getOperand(0),
+ AbsOp1, NZCVOp, Condition, Cmp0);
} else {
- CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, FlagsVT, Cmp1.getOperand(0),
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
}
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
@@ -25129,8 +25220,9 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
if (!TReassocOp && !FReassocOp)
return SDValue();
- SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
- DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp);
+ SDValue NewCmp =
+ DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
+ DAG.getVTList(VT, FlagsVT), CmpOpOther, SubsOp);
auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
if (!ReassocOp)
@@ -27156,7 +27248,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
: AArch64SysReg::RNDRRS);
SDLoc DL(N);
SDValue A = DAG.getNode(
- AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::i32, MVT::Other),
+ AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, FlagsVT, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32));
SDValue B = DAG.getNode(
AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
@@ -27902,16 +27994,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
MemVT.getScalarSizeInBits() == 32u ||
MemVT.getScalarSizeInBits() == 64u)) {
+ EVT HalfVT = MemVT.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::LDNP, SDLoc(N),
- DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- MVT::Other}),
+ DAG.getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()},
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
SDValue Pair = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), MemVT,
- Result.getValue(0), Result.getValue(1));
+ DAG.getBitcast(HalfVT, Result.getValue(0)),
+ DAG.getBitcast(HalfVT, Result.getValue(1)));
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 713793e..95d0e3b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -182,6 +182,13 @@ public:
MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ /// Replace (0, vreg) discriminator components with the operands of blend
+ /// or with (immediate, NoRegister) when possible.
+ void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB,
+ MachineOperand &IntDiscOp,
+ MachineOperand &AddrDiscOp,
+ const TargetRegisterClass *AddrDiscRC) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -215,7 +222,8 @@ public:
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
unsigned Factor) const override;
- bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ bool lowerInterleavedStore(Instruction *Store, Value *Mask,
+ ShuffleVectorInst *SVI,
unsigned Factor) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc57537..8685d7a0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -20,7 +20,6 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -36,7 +35,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -533,8 +531,9 @@ bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MBP.LHS = LastInst->getOperand(0);
MBP.RHS = MachineOperand::CreateImm(0);
- MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
- : MachineBranchPredicate::PRED_EQ;
+ MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
+ ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
return false;
}
@@ -7353,9 +7352,6 @@ bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
- case AArch64MachineCombinerPattern::GATHER_LANE_i32:
- case AArch64MachineCombinerPattern::GATHER_LANE_i16:
- case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return true;
} // end switch (Pattern)
return false;
@@ -7396,252 +7392,11 @@ static bool getMiscPatterns(MachineInstr &Root,
return false;
}
-static bool getGatherPattern(MachineInstr &Root,
- SmallVectorImpl<unsigned> &Patterns,
- unsigned LoadLaneOpCode, unsigned NumLanes) {
- const MachineFunction *MF = Root.getMF();
-
- // Early exit if optimizing for size.
- if (MF->getFunction().hasMinSize())
- return false;
-
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-
- // The root of the pattern must load into the last lane of the vector.
- if (Root.getOperand(2).getImm() != NumLanes - 1)
- return false;
-
- // Check that we have load into all lanes except lane 0.
- // For each load we also want to check that:
- // 1. It has a single non-debug use (since we will be replacing the virtual
- // register)
- // 2. That the addressing mode only uses a single offset register.
- auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
- SmallSet<unsigned, 4> RemainingLanes(Range.begin(), Range.end());
- while (!RemainingLanes.empty() && CurrInstr &&
- CurrInstr->getOpcode() == LoadLaneOpCode &&
- MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
- CurrInstr->getNumOperands() == 4) {
- RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
- CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
- }
-
- if (!RemainingLanes.empty())
- return false;
-
- // Match the SUBREG_TO_REG sequence.
- if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
- return false;
-
- // Verify that the subreg to reg loads an integer into the first lane.
- auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
- unsigned SingleLaneSizeInBits = 128 / NumLanes;
- if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
- return false;
-
- // Verify that it also has a single non debug use.
- if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
- return false;
-
- switch (NumLanes) {
- case 4:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);
- break;
- case 8:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16);
- break;
- case 16:
- Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8);
- break;
- default:
- llvm_unreachable("Got bad number of lanes for gather pattern.");
- }
-
- return true;
-}
-
-/// Search for patterns where we use LD1 instructions to load into
-/// separate lanes of an 128 bit Neon register. We can increase Memory Level
-/// Parallelism by loading into 2 Neon registers instead.
-static bool getLoadPatterns(MachineInstr &Root,
- SmallVectorImpl<unsigned> &Patterns) {
-
- // The pattern searches for loads into single lanes.
- switch (Root.getOpcode()) {
- case AArch64::LD1i32:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 4);
- case AArch64::LD1i16:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 8);
- case AArch64::LD1i8:
- return getGatherPattern(Root, Patterns, Root.getOpcode(), 16);
- default:
- return false;
- }
-}
-
-static void
-generateGatherPattern(MachineInstr &Root,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<Register, unsigned> &InstrIdxForVirtReg,
- unsigned Pattern, unsigned NumLanes) {
-
- MachineFunction &MF = *Root.getParent()->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-
- // Gather the initial load instructions to build the pattern
- SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
- MachineInstr *CurrInstr = &Root;
- for (unsigned i = 0; i < NumLanes - 1; ++i) {
- LoadToLaneInstrs.push_back(CurrInstr);
- CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
- }
-
- // Sort the load instructions according to the lane.
- llvm::sort(LoadToLaneInstrs,
- [](const MachineInstr *A, const MachineInstr *B) {
- return A->getOperand(2).getImm() > B->getOperand(2).getImm();
- });
-
- MachineInstr *SubregToReg = CurrInstr;
- LoadToLaneInstrs.push_back(
- MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
- auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
-
- const TargetRegisterClass *FPR128RegClass =
- MRI.getRegClass(Root.getOperand(0).getReg());
-
- auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
- Register SrcRegister, unsigned Lane,
- Register OffsetRegister) {
- auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
- MachineInstrBuilder LoadIndexIntoRegister =
- BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
- NewRegister)
- .addReg(SrcRegister)
- .addImm(Lane)
- .addReg(OffsetRegister, getKillRegState(true));
- InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
- InsInstrs.push_back(LoadIndexIntoRegister);
- return NewRegister;
- };
-
- // Helper to create load instruction based on opcode
- auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
- Register OffsetReg) -> MachineInstrBuilder {
- unsigned Opcode;
- switch (NumLanes) {
- case 4:
- Opcode = AArch64::LDRSui;
- break;
- case 8:
- Opcode = AArch64::LDRHui;
- break;
- case 16:
- Opcode = AArch64::LDRBui;
- break;
- default:
- llvm_unreachable(
- "Got unsupported number of lanes in machine-combiner gather pattern");
- }
- // Immediate offset load
- return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
- .addReg(OffsetReg)
- .addImm(0); // immediate offset
- };
-
- // Load the remaining lanes into register 0.
- auto LanesToLoadToReg0 =
- llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
- LoadToLaneInstrsAscending.begin() + NumLanes / 2);
- auto PrevReg = SubregToReg->getOperand(0).getReg();
- for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
- PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
- DelInstrs.push_back(LoadInstr);
- }
- auto LastLoadReg0 = PrevReg;
-
- // First load into register 1. Perform a LDRSui to zero out the upper lanes in
- // a single instruction.
- auto Lane0Load = *LoadToLaneInstrsAscending.begin();
- auto OriginalSplitLoad =
- *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
- auto DestRegForMiddleIndex = MRI.createVirtualRegister(
- MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
-
- MachineInstrBuilder MiddleIndexLoadInstr =
- CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
- OriginalSplitLoad->getOperand(3).getReg());
-
- InstrIdxForVirtReg.insert(
- std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
- InsInstrs.push_back(MiddleIndexLoadInstr);
- DelInstrs.push_back(OriginalSplitLoad);
-
- // Subreg To Reg instruction for register 1.
- auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
- unsigned SubregType;
- switch (NumLanes) {
- case 4:
- SubregType = AArch64::ssub;
- break;
- case 8:
- SubregType = AArch64::hsub;
- break;
- case 16:
- SubregType = AArch64::bsub;
- break;
- default:
- llvm_unreachable(
- "Got invalid NumLanes for machine-combiner gather pattern");
- }
-
- auto SubRegToRegInstr =
- BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
- DestRegForSubregToReg)
- .addImm(0)
- .addReg(DestRegForMiddleIndex, getKillRegState(true))
- .addImm(SubregType);
- InstrIdxForVirtReg.insert(
- std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
- InsInstrs.push_back(SubRegToRegInstr);
-
- // Load remaining lanes into register 1.
- auto LanesToLoadToReg1 =
- llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
- LoadToLaneInstrsAscending.end());
- PrevReg = SubRegToRegInstr->getOperand(0).getReg();
- for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
- PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
- if (Index == NumLanes / 2 - 2) {
- break;
- }
- DelInstrs.push_back(LoadInstr);
- }
- auto LastLoadReg1 = PrevReg;
-
- // Create the final zip instruction to combine the results.
- MachineInstrBuilder ZipInstr =
- BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
- Root.getOperand(0).getReg())
- .addReg(LastLoadReg0)
- .addReg(LastLoadReg1);
- InsInstrs.push_back(ZipInstr);
-}
-
CombinerObjective
AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
switch (Pattern) {
case AArch64MachineCombinerPattern::SUBADD_OP1:
case AArch64MachineCombinerPattern::SUBADD_OP2:
- case AArch64MachineCombinerPattern::GATHER_LANE_i32:
- case AArch64MachineCombinerPattern::GATHER_LANE_i16:
- case AArch64MachineCombinerPattern::GATHER_LANE_i8:
return CombinerObjective::MustReduceDepth;
default:
return TargetInstrInfo::getCombinerObjective(Pattern);
@@ -7671,10 +7426,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMiscPatterns(Root, Patterns))
return true;
- // Load patterns
- if (getLoadPatterns(Root, Patterns))
- return true;
-
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
@@ -8930,21 +8681,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
break;
}
- case AArch64MachineCombinerPattern::GATHER_LANE_i32: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 4);
- break;
- }
- case AArch64MachineCombinerPattern::GATHER_LANE_i16: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 8);
- break;
- }
- case AArch64MachineCombinerPattern::GATHER_LANE_i8: {
- generateGatherPattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
- Pattern, 16);
- break;
- }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 02734866..7c255da 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -172,10 +172,6 @@ enum AArch64MachineCombinerPattern : unsigned {
FMULv8i16_indexed_OP2,
FNMADD,
-
- GATHER_LANE_i32,
- GATHER_LANE_i16,
- GATHER_LANE_i8
};
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9f8a257..07cacfa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -430,26 +430,27 @@ def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
+ SDTCisInt<0>,
+ SDTCisVT<1, FlagsVT>]>;
// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
- SDTCisVT<3, i32>]>;
+ SDTCisVT<3, FlagsVT>]>;
// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>,
- SDTCisVT<1, i32>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<1, FlagsVT>,
+ SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
+ SDTCisVT<2, FlagsVT>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, OtherVT>]>;
@@ -458,22 +459,22 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisInt<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
-def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<2, 1>]>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
@@ -518,10 +519,10 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
@@ -1124,10 +1125,10 @@ def AArch64probedalloca
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPMayStore]>;
-// MRS, also sets the flags via a glue.
+// MRS, also sets the flags.
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
- SDTCisVT<1, i32>,
+ SDTCisVT<1, FlagsVT>,
SDTCisVT<2, i32>]>,
[SDNPHasChain]>;
@@ -2032,7 +2033,7 @@ let Predicates = [HasPAuth] in {
def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm PAC : SignAuth<0b000, 0b010, "pac", null_frag>;
defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
@@ -2152,6 +2153,26 @@ let Predicates = [HasPAuth] in {
let Uses = [];
}
+ // PAC pseudo instruction. In AsmPrinter, it is expanded into an actual PAC*
+ // instruction immediately preceded by the discriminator computation.
+ // This enforces the expected immediate modifier is used for signing, even
+ // if an attacker is able to substitute AddrDisc.
+ def PAC : Pseudo<(outs GPR64:$SignedVal),
+ (ins GPR64:$Val, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
+ [], "$SignedVal = $Val">, Sched<[WriteI, ReadI]> {
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let Size = 12;
+ let Defs = [X16, X17];
+ let usesCustomInserter = 1;
+ }
+
+ // A standalone pattern is used, so that literal 0 can be passed as $Disc.
+ def : Pat<(int_ptrauth_sign GPR64:$Val, timm:$Key, GPR64noip:$AddrDisc),
+ (PAC GPR64:$Val, $Key, 0, GPR64noip:$AddrDisc)>;
+
// AUT and re-PAC a value, using different keys/data.
// This directly manipulates x16/x17, which are the only registers that
// certain OSs guarantee are safe to use for sensitive operations.
@@ -3934,6 +3955,26 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+// load zero-extended i32, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+
+// load zero-extended i16, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
+// load zero-extended i16, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 61bf87f..1a7609b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -305,7 +305,8 @@ def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
// Condition code regclass.
-def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+defvar FlagsVT = i32;
+def CCR : RegisterClass<"AArch64", [FlagsVT], 32, (add NZCV)> {
let CopyCost = -1; // Don't allow copying of status registers.
// CCR is not allocatable.
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index bafb8d0..8a5b5ba 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -32,10 +32,29 @@ AArch64SelectionDAGInfo::AArch64SelectionDAGInfo()
void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const {
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+
#ifndef NDEBUG
+ // Some additional checks not yet implemented by verifyTargetNode.
+ constexpr MVT FlagsVT = MVT::i32;
switch (N->getOpcode()) {
- default:
- return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
+ case AArch64ISD::SUBS:
+ assert(N->getValueType(1) == FlagsVT);
+ break;
+ case AArch64ISD::ADC:
+ case AArch64ISD::SBC:
+ assert(N->getOperand(2).getValueType() == FlagsVT);
+ break;
+ case AArch64ISD::ADCS:
+ case AArch64ISD::SBCS:
+ assert(N->getValueType(1) == FlagsVT);
+ assert(N->getOperand(2).getValueType() == FlagsVT);
+ break;
+ case AArch64ISD::CSEL:
+ case AArch64ISD::CSINC:
+ case AArch64ISD::BRCOND:
+ assert(N->getOperand(3).getValueType() == FlagsVT);
+ break;
case AArch64ISD::SADDWT:
case AArch64ISD::SADDWB:
case AArch64ISD::UADDWT:
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 75c7dd9..f136a184 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -581,7 +581,6 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
// statement if return_twice functions are called.
bool StandardLifetime =
!SInfo.CallsReturnTwice &&
- SInfo.UnrecognizedLifetimes.empty() &&
memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI,
ClMaxLifetimes);
if (StandardLifetime) {
@@ -616,10 +615,5 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
memtag::annotateDebugRecords(Info, Tag);
}
- // If we have instrumented at least one alloca, all unrecognized lifetime
- // intrinsics have to go.
- for (auto *I : SInfo.UnrecognizedLifetimes)
- I->eraseFromParent();
-
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index c218831..85de2d5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -36,7 +36,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
// SHF_AARCH64_PURECODE flag set if the "+execute-only" target feature is
// present.
if (TM.getMCSubtargetInfo()->hasFeature(AArch64::FeatureExecuteOnly)) {
- auto *Text = cast<MCSectionELF>(TextSection);
+ auto *Text = static_cast<MCSectionELF *>(TextSection);
Text->setFlags(Text->getFlags() | ELF::SHF_AARCH64_PURECODE);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 90d3d92..40f49da 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -249,7 +249,7 @@ static bool hasPossibleIncompatibleOps(const Function *F) {
return false;
}
-uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const {
+APInt AArch64TTIImpl::getFeatureMask(const Function &F) const {
StringRef AttributeStr =
isMultiversionedFunction(F) ? "fmv-features" : "target-features";
StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index b27eb2e..7f45177 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -89,7 +89,7 @@ public:
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const override;
- uint64_t getFeatureMask(const Function &F) const override;
+ APInt getFeatureMask(const Function &F) const override;
bool isMultiversionedFunction(const Function &F) const override;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 08f547a..6257e99 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -523,7 +523,8 @@ void AArch64TargetELFStreamer::finish() {
// mark it execute-only if it is empty and there is at least one
// execute-only section in the object.
if (any_of(Asm, [](const MCSection &Sec) {
- return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_AARCH64_PURECODE;
+ return static_cast<const MCSectionELF &>(Sec).getFlags() &
+ ELF::SHF_AARCH64_PURECODE;
})) {
auto *Text =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 3d4a14b..1a9bce5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -9,8 +9,6 @@
#include "AArch64MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1ac340a..a22a17a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -132,7 +132,8 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
// But only if they don't point to a few forbidden sections.
if (!Symbol.isInSection())
return true;
- const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ const MCSectionMachO &RefSec =
+ static_cast<MCSectionMachO &>(Symbol.getSection());
if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
return false;