aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h6
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp17
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/ARM/ARM.h2
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp61
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMBranchTargets.cpp135
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp72
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/ARM/CMakeLists.txt1
-rw-r--r--llvm/test/CodeGen/ARM/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir311
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-const-island.mir168
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll22
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll135
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-jump-table.mir120
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-outliner-1.ll101
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-outliner-2.ll82
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll67
-rw-r--r--llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll51
20 files changed, 1345 insertions, 29 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8bc730a..d43dd9f 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1913,6 +1913,12 @@ public:
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
+ /// Optional target hook to create the LLVM IR attributes for the outlined
+ /// function. If overridden, the overriding function must call the default
+ /// implementation.
+ virtual void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const;
+
/// Returns how or if \p MI should be outlined.
virtual outliner::InstrType
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index cfbcceb..7783b5e 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Include target features from an arbitrary candidate for the outlined
- // function. This makes sure the outlined function knows what kinds of
- // instructions are going into it. This is fine, since all parent functions
- // must necessarily support the instructions that are in the outlined region.
Candidate &FirstCand = OF.Candidates.front();
- const Function &ParentFn = FirstCand.getMF()->getFunction();
- if (ParentFn.hasFnAttribute("target-features"))
- F->addFnAttr(ParentFn.getFnAttribute("target-features"));
+ const TargetInstrInfo &TII =
+ *FirstCand.getMF()->getSubtarget().getInstrInfo();
- // Set nounwind, so we don't generate eh_frame.
- if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }))
- F->addFnAttr(Attribute::NoUnwind);
+ TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index f94b548..5119dac 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1400,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment(
}
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+
+void TargetInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ outliner::Candidate &FirstCand = Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F.addFnAttr(Attribute::NoUnwind);
+}
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 5500783f..1d5e45a 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -44,6 +44,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMBranchTargetsPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
@@ -66,6 +67,7 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
void initializeARMParallelDSPPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMBranchTargetsPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index a5dc45d..cf1be00 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5737,17 +5737,17 @@ enum MachineOutlinerMBBFlags {
};
struct OutlinerCosts {
- const int CallTailCall;
- const int FrameTailCall;
- const int CallThunk;
- const int FrameThunk;
- const int CallNoLRSave;
- const int FrameNoLRSave;
- const int CallRegSave;
- const int FrameRegSave;
- const int CallDefault;
- const int FrameDefault;
- const int SaveRestoreLROnStack;
+ int CallTailCall;
+ int FrameTailCall;
+ int CallThunk;
+ int FrameThunk;
+ int CallNoLRSave;
+ int FrameNoLRSave;
+ int CallRegSave;
+ int FrameRegSave;
+ int CallDefault;
+ int FrameDefault;
+ int SaveRestoreLROnStack;
OutlinerCosts(const ARMSubtarget &target)
: CallTailCall(target.isThumb() ? 4 : 4),
@@ -5868,6 +5868,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
return outliner::OutlinedFunction();
}
+ // Partition the candidates in two sets: one with BTI enabled and one with BTI
+ // disabled. Remove the candidates from the smaller set. We expect the
+ // majority of the candidates to be in consensus with regard to branch target
+ // enforcement with just a few oddballs, but if they are the same number
+ // prefer the non-BTI ones for outlining, since they have less overhead.
+ auto NoBTI =
+ llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+ return AFI.branchTargetEnforcement();
+ });
+ if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
+ std::distance(NoBTI, RepeatedSequenceLocs.end()))
+ RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
+ else
+ RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -5881,6 +5899,16 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
+ const auto &SomeMFI =
+ *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
+ // Adjust costs to account for the BTI instructions.
+ if (SomeMFI.branchTargetEnforcement()) {
+ Costs.FrameDefault += 4;
+ Costs.FrameNoLRSave += 4;
+ Costs.FrameRegSave += 4;
+ Costs.FrameTailCall += 4;
+ Costs.FrameThunk += 4;
+ }
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = Costs.FrameDefault;
@@ -6078,7 +6106,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
}
return false;
+}
+
+void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ outliner::Candidate &C = Candidates.front();
+ // branch-target-enforcement is guaranteed to be consistent between all
+ // candidates, so we only need to look at one.
+ const Function &CFn = C.getMF()->getFunction();
+ if (CFn.hasFnAttribute("branch-target-enforcement"))
+ F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
+ ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
}
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index db93209..e2c2504 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -349,6 +349,8 @@ public:
bool OutlineFromLinkOnceODRs) const override;
outliner::OutlinedFunction getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
unsigned Flags) const override;
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
new file mode 100644
index 0000000..1091c1f
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
@@ -0,0 +1,135 @@
+//===-- ARMBranchTargets.cpp -- Harden code using v8.1-M BTI extension -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-branch-targets"
+#define ARM_BRANCH_TARGETS_NAME "ARM Branch Targets"
+
+namespace {
+class ARMBranchTargets : public MachineFunctionPass {
+public:
+ static char ID;
+ ARMBranchTargets() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return ARM_BRANCH_TARGETS_NAME; }
+
+private:
+ void addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB);
+};
+} // end anonymous namespace
+
+char ARMBranchTargets::ID = 0;
+
+INITIALIZE_PASS(ARMBranchTargets, "arm-branch-targets", ARM_BRANCH_TARGETS_NAME,
+ false, false)
+
+void ARMBranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createARMBranchTargetsPass() {
+ return new ARMBranchTargets();
+}
+
+bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** ARM Branch Targets **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ const ARMInstrInfo &TII =
+ *static_cast<const ARMInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // LLVM does not consider basic blocks which are the targets of jump tables
+ // to be address-taken (the address can't escape anywhere else), but they are
+ // used for indirect branches, so need BTI instructions.
+ SmallPtrSet<const MachineBasicBlock *, 8> JumpTableTargets;
+ if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs)
+ JumpTableTargets.insert(MBB);
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool NeedBTI = false;
+ bool IsFirstBB = &MBB == &MF.front();
+
+ // Every function can potentially be called indirectly (even if it has
+ // static linkage, due to linker-generated veneers).
+ if (IsFirstBB)
+ NeedBTI = true;
+
+ // If the block itself is address-taken, or is an exception landing pad, it
+ // could be indirectly branched to.
+ if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB))
+ NeedBTI = true;
+
+ if (NeedBTI) {
+ addBTI(TII, MBB, IsFirstBB);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// Insert a BTI/PACBTI instruction into a given basic block \c MBB. If
+/// \c IsFirstBB is true (meaning that this is the first BB in a function) try
+/// to find a PAC instruction and replace it with PACBTI. Otherwise just insert
+/// a BTI instruction.
+/// The point of insertion is in the beginning of the BB, immediately after meta
+/// instructions (such labels in exception handling landing pads).
+void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
+ bool IsFirstBB) {
+ // Which instruction to insert: BTI or PACBTI
+ unsigned OpCode = ARM::t2BTI;
+
+ // Skip meta instructions, including EH labels
+ auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+
+ // If this is the first BB in a function, check if it starts with a PAC
+ // instruction and in that case remove the PAC instruction.
+ if (IsFirstBB) {
+ if (MBBI != MBB.instr_end() && MBBI->getOpcode() == ARM::t2PAC) {
+ LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName()
+ << "' to replace with PACBTI\n");
+ OpCode = ARM::t2PACBTI;
+ auto NextMBBI = std::next(MBBI);
+ MBBI->eraseFromParent();
+ MBBI = NextMBBI;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inserting a '"
+ << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI")
+ << "' instr into BB '" << MBB.getName() << "'\n");
+ // Finally, insert a new instruction (either PAC or PACBTI)
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode));
+}
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index fe182ac..c2ca470 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -184,6 +184,9 @@ namespace {
/// base address.
DenseMap<int, int> JumpTableUserIndices;
+ // Maps a MachineBasicBlock to the number of jump tables entries.
+ DenseMap<const MachineBasicBlock *, int> BlockJumpTableRefCount;
+
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
@@ -274,7 +277,10 @@ namespace {
unsigned &DeadSize, bool &CanDeleteLEA,
bool &BaseRegKill);
bool optimizeThumb2JumpTables();
- MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
+ void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB);
+ MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
+ MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
unsigned getUserOffset(CPUser&) const;
@@ -518,6 +524,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
CPEntries.clear();
JumpTableEntryIndices.clear();
JumpTableUserIndices.clear();
+ BlockJumpTableRefCount.clear();
ImmBranches.clear();
PushPopMIs.clear();
T2JumpTables.clear();
@@ -720,6 +727,14 @@ Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
return MCP->getConstants()[CPI].getAlign();
}
+// Exception landing pads, blocks that has their adress taken, and function
+// entry blocks will always be (potential) indirect jump targets, regardless of
+// whether they are referenced by or not by jump tables.
+static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) {
+ return MBB.isEHPad() || MBB.hasAddressTaken() ||
+ &MBB == &MBB.getParent()->front();
+}
+
/// scanFunctionJumpTables - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
@@ -730,6 +745,20 @@ void ARMConstantIslands::scanFunctionJumpTables() {
(I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
T2JumpTables.push_back(&I);
}
+
+ if (!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return;
+
+ if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (isAlwaysIndirectTarget(*MBB))
+ // Set the reference count essentially to infinity, it will never
+ // reach zero and the BTI Instruction will never be removed.
+ BlockJumpTableRefCount[MBB] = std::numeric_limits<int>::max();
+ else
+ ++BlockJumpTableRefCount[MBB];
+ }
}
/// initializeFunctionInfo - Do the initial scan of the function, building up
@@ -2411,7 +2440,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
// The destination precedes the switch. Try to move the block forward
// so we have a positive offset.
MachineBasicBlock *NewBB =
- adjustJTTargetBlockForward(MBB, MI->getParent());
+ adjustJTTargetBlockForward(JTI, MBB, MI->getParent());
if (NewBB)
MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB);
MadeChange = true;
@@ -2422,8 +2451,40 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
return MadeChange;
}
-MachineBasicBlock *ARMConstantIslands::
-adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB) {
+ assert(isThumb2 && "BTI in Thumb1?");
+
+ // Insert a BTI instruction into NewBB
+ BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI));
+
+ // Update jump table reference counts.
+ const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI];
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (MBB != &OldBB)
+ continue;
+ --BlockJumpTableRefCount[MBB];
+ ++BlockJumpTableRefCount[&NewBB];
+ }
+
+ // If the old basic block reference count dropped to zero, remove
+ // the BTI instruction at its beginning.
+ if (BlockJumpTableRefCount[&OldBB] > 0)
+ return;
+
+ // Skip meta instructions
+ auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+ assert(BTIPos->getOpcode() == ARM::t2BTI &&
+ "BasicBlock is mentioned in a jump table but does start with BTI");
+ if (BTIPos->getOpcode() == ARM::t2BTI)
+ BTIPos->eraseFromParent();
+}
+
+MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
+ unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -2481,6 +2542,9 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
NewBB->addSuccessor(BB);
JTBB->replaceSuccessor(BB, NewBB);
+ if (MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ fixupBTI(JTI, *BB, *NewBB);
+
++NumJTInserted;
return NewBB;
}
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 833c7ef..0b314ac 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
initializeARMParallelDSPPass(Registry);
+ initializeARMBranchTargetsPass(Registry);
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDomainFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
@@ -571,6 +572,7 @@ void ARMPassConfig::addPreEmitPass() {
}
void ARMPassConfig::addPreEmitPass2() {
+ addPass(createARMBranchTargetsPass());
addPass(createARMConstantIslandPass());
addPass(createARMLowOverheadLoopsPass());
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index a3ec9e9..ed265d9 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_target(ARMCodeGen
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMBasicBlockInfo.cpp
+ ARMBranchTargets.cpp
ARMCallingConv.cpp
ARMCallLowering.cpp
ARMConstantIslandPass.cpp
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index d0ef182..b05429f 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -179,6 +179,7 @@
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Outliner
; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: ARM Branch Targets
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: ARM constant island placement and branch shortening pass
; CHECK-NEXT: MachineDominator Tree Construction
diff --git a/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir b/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir
new file mode 100644
index 0000000..e565d34
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir
@@ -0,0 +1,311 @@
+# RUN: llc -verify-machineinstrs -run-pass arm-cp-islands %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-unknown-eabi"
+
+ ; Tests adjustments to jump tables, made by the ARM Constant Islands pass
+ ; int g(int), h(int);
+ ; void g0(int), g1(int), g2(int);
+ ; void h0(int), h1(int), h2(int);
+ ;
+ ; void f(int x) {
+ ; for (;;) {
+ ; up:
+ ; x = g(x);
+ ; switch (x) {
+ ; case 0:
+ ; g0(x);
+ ; break;
+ ; case 1:
+ ; g1(x);
+ ; break;
+ ; case 2:
+ ; g2(x);
+ ; break;
+ ; case 3:
+ ; break;
+ ; case 4:
+ ; for (;;) {
+ ; x = h(x);
+ ; switch (x) {
+ ; case 0:
+ ; h0(x);
+ ; break;
+ ; case 1:
+ ; h1(x);
+ ; break;
+ ; case 2:
+ ; h2(x);
+ ; break;
+ ; case 3:
+ ; goto up;
+ ; case 4:
+ ; return;
+ ; }
+ ; }
+ ; }
+ ; }
+ ; }
+
+ define hidden void @f(i32 %x) local_unnamed_addr #0 {
+ entry:
+ br label %up
+
+ up: ; preds = %up, %sw.bb, %sw.bb1, %sw.bb2, %up.backedge.loopexit, %entry
+ %x.addr.1 = phi i32 [ %x, %entry ], [ %call, %up ], [ %call, %sw.bb2 ], [ %call, %sw.bb1 ], [ %call, %sw.bb ], [ %call5, %up.backedge.loopexit ]
+ %call = tail call i32 @g(i32 %x.addr.1)
+ switch i32 %call, label %up [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 4, label %for.cond4.preheader
+ ]
+
+ for.cond4.preheader: ; preds = %up
+ br label %for.cond4
+
+ up.backedge.loopexit: ; preds = %for.cond4
+ br label %up
+
+ sw.bb: ; preds = %up
+ tail call void @g0(i32 0)
+ br label %up
+
+ sw.bb1: ; preds = %up
+ tail call void @g1(i32 1)
+ br label %up
+
+ sw.bb2: ; preds = %up
+ tail call void @g2(i32 2)
+ br label %up
+
+ for.cond4: ; preds = %for.cond4, %sw.bb6, %sw.bb7, %sw.bb8, %for.cond4.preheader
+ %x.addr.2 = phi i32 [ %call, %for.cond4.preheader ], [ %call5, %sw.bb8 ], [ %call5, %sw.bb7 ], [ %call5, %sw.bb6 ], [ %call5, %for.cond4 ]
+ %call5 = tail call i32 @h(i32 %x.addr.2)
+ switch i32 %call5, label %for.cond4 [
+ i32 0, label %sw.bb6
+ i32 1, label %sw.bb7
+ i32 2, label %sw.bb8
+ i32 3, label %up.backedge.loopexit
+ i32 4, label %sw.bb10
+ ]
+
+ sw.bb6: ; preds = %for.cond4
+ tail call void @h0(i32 0)
+ br label %for.cond4
+
+ sw.bb7: ; preds = %for.cond4
+ tail call void @h1(i32 1)
+ br label %for.cond4
+
+ sw.bb8: ; preds = %for.cond4
+ tail call void @h2(i32 2)
+ br label %for.cond4
+
+ sw.bb10: ; preds = %for.cond4
+ ret void
+ }
+
+ declare dso_local i32 @g(i32)
+
+ declare dso_local void @g0(i32)
+
+ declare dso_local void @g1(i32)
+
+ declare dso_local void @g2(i32)
+
+ declare dso_local i32 @h(i32)
+
+ declare dso_local void @h0(i32)
+
+ declare dso_local void @h1(i32)
+
+ declare dso_local void @h2(i32)
+
+ attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="none" "no-jump-tables"="false" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+ras,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" }
+
+ !llvm.module.flags = !{!0}
+
+ !0 = !{i32 1, !"branch-target-enforcement", i32 1}
+
+...
+---
+name: f
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins:
+ - { reg: '$r0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.3', '%bb.4', '%bb.5', '%bb.1', '%bb.7' ]
+ - id: 1
+ blocks: [ '%bb.6', '%bb.9', '%bb.10', '%bb.1', '%bb.11' ]
+
+# %bb.4 and %bb.10 redirect to %bb1, the rest are just renumbered
+# CHECK-LABEL: jumpTable:
+# CHECK-NEXT: kind: inline
+# CHECK-NEXT: entries:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: blocks: [ '%bb.6', '%bb.14', '%bb.5', '%bb.4', '%bb.7' ]
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT: blocks: [ '%bb.11', '%bb.12', '%bb.13', '%bb.10', '%bb.15' ]
+
+# %bb.1 loses the BTI
+# CHECK-LABEL: bb.1.up (align 4):
+# CHECK-NOT: t2BTI
+# CHECK-LABEL: bb.2.up:
+
+# CHECK-LABEL: bb.4.up:
+# CHECK: t2BTI
+# CHECK: tB %bb.1
+
+# CHECK-LABEL: bb.10.for.cond4:
+# CHECK: t2BTI
+# CHECK: tB %bb.1
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $r0, $r4, $lr
+
+ t2BTI
+ frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r4, -8
+ $r4 = tMOVr killed $r0, 14 /* CC::al */, $noreg
+ t2B %bb.1, 14 /* CC::al */, $noreg
+
+ bb.5.sw.bb2:
+ successors: %bb.1(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @g2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+
+ bb.1.up (align 4):
+ successors: %bb.1(0x20000000), %bb.2(0x60000000)
+ liveins: $r4
+
+ t2BTI
+ $r0 = tMOVr killed $r4, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @g, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
+ $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+ tCMPi8 killed $r0, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.1, 8 /* CC::hi */, killed $cpsr
+ bb.2.up:
+ successors: %bb.3(0x15555555), %bb.4(0x15555555), %bb.5(0x15555555), %bb.1(0x2aaaaaab), %bb.7(0x15555555)
+ liveins: $r4
+
+ renamable $r0 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ renamable $r0 = t2ADDrs killed renamable $r0, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg
+ t2BR_JT killed renamable $r0, renamable $r4, %jump-table.0
+
+ bb.3.sw.bb:
+ successors: %bb.1(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @g0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+ t2B %bb.1, 14 /* CC::al */, $noreg
+
+ bb.6.sw.bb6:
+ successors: %bb.7(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @h0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+
+ bb.7.for.cond4 (align 4):
+ successors: %bb.7(0x3efbefc0), %bb.8(0x41041040)
+ liveins: $r4
+
+ t2BTI
+ $r0 = tMOVr killed $r4, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @h, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
+ $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+ tCMPi8 killed $r0, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.7, 8 /* CC::hi */, killed $cpsr
+
+ bb.8.for.cond4:
+ successors: %bb.6(0x29555555), %bb.9(0x29555555), %bb.10(0x29555555), %bb.1(0x02000000), %bb.11(0x02000000)
+ liveins: $r4
+
+ renamable $r0 = t2LEApcrelJT %jump-table.1, 14 /* CC::al */, $noreg
+ renamable $r0 = t2ADDrs killed renamable $r0, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg
+ t2BR_JT killed renamable $r0, renamable $r4, %jump-table.1
+
+ bb.9.sw.bb7:
+ successors: %bb.7(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @h1, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+ t2B %bb.7, 14 /* CC::al */, $noreg
+
+ bb.10.sw.bb8:
+ successors: %bb.7(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @h2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+ t2B %bb.7, 14 /* CC::al */, $noreg
+
+ bb.4.sw.bb1:
+ successors: %bb.1(0x80000000)
+ liveins: $r4
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @g1, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp
+ t2B %bb.1, 14 /* CC::al */, $noreg
+
+ bb.11.sw.bb10:
+ t2BTI
+ frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/bti-const-island.mir b/llvm/test/CodeGen/Thumb2/bti-const-island.mir
new file mode 100644
index 0000000..52ee5c8
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-const-island.mir
@@ -0,0 +1,168 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv7m-arm-none-eabi -run-pass=arm-cp-islands %s -o - | FileCheck %s
+
+# This test checks that the ARM Constant Island pass correctly handles BTI
+# instructions when adding new BBs to jump tables.
+#
+# Specifically the pass will replace bb.1.bb42.i in the jump table with a new
+# BB which will contain an unconditional branch to bb.1.bb42.i.
+# We expect that a BTI instruction will be added to the new BB and removed from
+# bb.1.bb42.i.
+
+--- |
+ declare noalias i8* @calloc(i32, i32)
+
+ define internal i32 @test(i32 %argc, i8** nocapture %argv) {
+ entry:
+ br label %bb42.i
+
+ bb5.i:
+ %0 = or i32 %argc, 32
+ br label %bb42.i
+
+ bb35.i:
+ %1 = call noalias i8* @calloc(i32 20, i32 1)
+ unreachable
+
+ bb37.i:
+ %2 = call noalias i8* @calloc(i32 14, i32 1)
+ unreachable
+
+ bb39.i:
+ %3 = call noalias i8* @calloc(i32 17, i32 1)
+ unreachable
+
+ bb42.i:
+ switch i32 %argc, label %bb39.i [
+ i32 70, label %bb35.i
+ i32 77, label %bb37.i
+ i32 100, label %bb5.i
+ i32 101, label %bb42.i
+ i32 116, label %bb42.i
+ ]
+ }
+
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"branch-target-enforcement", i32 1}
+
+...
+---
+name: test
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$r0' }
+frameInfo:
+ stackSize: 8
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr' }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' }
+machineFunctionInfo: {}
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.3', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.4', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.1', '%bb.1', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5',
+ '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.1' ]
+body: |
+ ; CHECK-LABEL: name: test
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $r0, $r7, $lr
+ ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+ ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
+ ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 70, 14 /* CC::al */, $noreg
+ ; CHECK: bb.1.bb42.i (align 4):
+ ; CHECK: successors: %bb.6(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $r0
+ ; CHECK: tCMPi8 renamable $r0, 46, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: tBcc %bb.6, 8 /* CC::hi */, killed $cpsr
+ ; CHECK: bb.2.bb42.i:
+ ; CHECK: successors: %bb.5(0x20000000), %bb.6(0x20000000), %bb.7(0x20000000), %bb.4(0x20000000)
+ ; CHECK: liveins: $r0
+ ; CHECK: t2TBB_JT $pc, $r0, %jump-table.0, 0
+ ; CHECK: bb.3:
+ ; CHECK: successors:
+ ; CHECK: JUMPTABLE_TBB 0, %jump-table.0, 188
+ ; CHECK: bb.4.bb42.i:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $r0
+ ; CHECK: t2BTI
+ ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg
+ ; CHECK: bb.5.bb35.i:
+ ; CHECK: successors:
+ ; CHECK: t2BTI
+ ; CHECK: $r0, dead $cpsr = tMOVi8 20, 14 /* CC::al */, $noreg
+ ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+ ; CHECK: bb.6.bb39.i:
+ ; CHECK: successors:
+ ; CHECK: t2BTI
+ ; CHECK: $r0, dead $cpsr = tMOVi8 17, 14 /* CC::al */, $noreg
+ ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+ ; CHECK: bb.7.bb37.i:
+ ; CHECK: t2BTI
+ ; CHECK: $r0, dead $cpsr = tMOVi8 14, 14 /* CC::al */, $noreg
+ ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+ bb.0.entry:
+ liveins: $r0, $r7, $lr
+
+ frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 70, 14 /* CC::al */, $noreg
+
+ bb.1.bb42.i (align 4):
+ successors: %bb.5, %bb.2
+ liveins: $r0
+
+ t2BTI
+ tCMPi8 renamable $r0, 46, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.5, 8 /* CC::hi */, killed $cpsr
+
+ bb.2.bb42.i:
+ successors: %bb.3, %bb.5, %bb.4, %bb.1
+ liveins: $r0
+
+ renamable $r1 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ renamable $r1 = t2ADDrs killed renamable $r1, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
+ t2BR_JT killed renamable $r1, renamable $r0, %jump-table.0
+
+ bb.3.bb35.i:
+ successors:
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 20, 14 /* CC::al */, $noreg
+ $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+
+ bb.5.bb39.i:
+ successors:
+
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 17, 14 /* CC::al */, $noreg
+ $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+
+ bb.4.bb37.i:
+ t2BTI
+ $r0, dead $cpsr = tMOVi8 14, 14 /* CC::al */, $noreg
+ $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll b/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll
new file mode 100644
index 0000000..ce673fe
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi | FileCheck %s
+
+define hidden i32 @linkage_external() local_unnamed_addr {
+; CHECK-LABEL: linkage_external:
+; CHECK: bti
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: bx lr
+entry:
+ ret i32 1
+}
+
+define internal i32 @linkage_internal() unnamed_addr {
+; CHECK-LABEL: linkage_internal:
+; CHECK: bti
+; CHECK: movs r0, #2
+; CHECK-NEXT: bx lr
+entry:
+ ret i32 2
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"branch-target-enforcement", i32 1}
diff --git a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
new file mode 100644
index 0000000..f56e3b6
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi | FileCheck %s
+
+define internal i32 @table_switch(i32 %x) {
+; CHECK-LABEL: table_switch:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bti
+; CHECK-NEXT: subs r1, r0, #1
+; CHECK-NEXT: cmp r1, #3
+; CHECK-NEXT: bhi .LBB0_4
+; CHECK-NEXT: @ %bb.1: @ %entry
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: tbb [pc, r1]
+; CHECK-NEXT: @ %bb.2:
+; CHECK-NEXT: .LJTI0_0:
+; CHECK-NEXT: .byte (.LBB0_5-(.LCPI0_0+4))/2
+; CHECK-NEXT: .byte (.LBB0_3-(.LCPI0_0+4))/2
+; CHECK-NEXT: .byte (.LBB0_6-(.LCPI0_0+4))/2
+; CHECK-NEXT: .byte (.LBB0_7-(.LCPI0_0+4))/2
+; CHECK-NEXT: .p2align 1
+; CHECK-NEXT: .LBB0_3: @ %bb2
+; CHECK-NEXT: bti
+; CHECK-NEXT: movs r0, #2
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .LBB0_4: @ %sw.epilog
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: .LBB0_5: @ %return
+; CHECK-NEXT: bti
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .LBB0_6: @ %bb3
+; CHECK-NEXT: bti
+; CHECK-NEXT: movs r0, #3
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .LBB0_7: @ %bb4
+; CHECK-NEXT: bti
+; CHECK-NEXT: movs r0, #4
+; CHECK-NEXT: bx lr
+entry:
+ switch i32 %x, label %sw.epilog [
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ ]
+
+bb1:
+ br label %return
+bb2:
+ br label %return
+bb3:
+ br label %return
+bb4:
+ br label %return
+sw.epilog:
+ br label %return
+
+return:
+ %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ]
+ ret i32 %ret
+}
+
+@computed_goto_cases = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@computed_goto, %return), i8* blockaddress(@computed_goto, %case_1)], align 4
+
+define internal i32 @computed_goto(i32 %x) {
+; CHECK-LABEL: computed_goto:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bti
+; CHECK-NEXT: movw r1, :lower16:.Lcomputed_goto_cases
+; CHECK-NEXT: movt r1, :upper16:.Lcomputed_goto_cases
+; CHECK-NEXT: ldr.w r0, [r1, r0, lsl #2]
+; CHECK-NEXT: mov pc, r0
+; CHECK-NEXT: .Ltmp3: @ Block address taken
+; CHECK-NEXT: .LBB1_1: @ %return
+; CHECK-NEXT: bti
+; CHECK-NEXT: movs r0, #2
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .Ltmp4: @ Block address taken
+; CHECK-NEXT: .LBB1_2: @ %case_1
+; CHECK-NEXT: bti
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: bx lr
+entry:
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @computed_goto_cases, i32 0, i32 %x
+ %0 = load i8*, i8** %arrayidx, align 4
+ indirectbr i8* %0, [label %return, label %case_1]
+
+case_1:
+ br label %return
+
+return:
+ %ret = phi i32 [ 1, %case_1 ], [ 2, %entry ]
+ ret i32 %ret
+}
+
+declare void @may_throw()
+declare void @consume_exception(i8*)
+declare i32 @__gxx_personality_v0(...)
+
+define internal i32 @exception_handling(i32 %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: exception_handling:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: bti
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: bl may_throw
+; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT: bti
+; CHECK-NEXT: bl consume_exception
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ invoke void @may_throw()
+ to label %return unwind label %lpad
+
+lpad:
+ %1 = landingpad { i8*, i32 }
+ catch i8* null
+ %2 = extractvalue { i8*, i32 } %1, 0
+ call void @consume_exception(i8* %2)
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ 1, %lpad ], [ 0, %entry ]
+ ret i32 %retval.0
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"branch-target-enforcement", i32 1}
diff --git a/llvm/test/CodeGen/Thumb2/bti-jump-table.mir b/llvm/test/CodeGen/Thumb2/bti-jump-table.mir
new file mode 100644
index 0000000..22f255b
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-jump-table.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc < %s -x mir -mtriple=thumbv7m-arm-none-eabi -run-pass=arm-branch-targets | FileCheck %s
+--- |
+ define internal i32 @table_switch(i32 %x) {
+ entry:
+ switch i32 %x, label %sw.epilog [
+ i32 1, label %return
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ ]
+
+ bb2:
+ br label %return
+
+ bb3:
+ br label %return
+
+ bb4:
+ br label %return
+
+ sw.epilog:
+ br label %return
+
+ return:
+ %ret = phi i32 [ 0, %sw.epilog ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 1, %entry ]
+ ret i32 %ret
+ }
+
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 1, !"branch-target-enforcement", i32 1}
+
+...
+---
+name: table_switch
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$r0' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.6', '%bb.2', '%bb.3', '%bb.4' ]
+body: |
+ ; CHECK-LABEL: name: table_switch
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $r0
+ ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 1, 14 /* CC::al */, $noreg
+ ; CHECK: tCMPi8 renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: t2Bcc %bb.3, 8 /* CC::hi */, killed $cpsr
+ ; CHECK: bb.1.entry:
+ ; CHECK: successors: %bb.4(0x20000000), %bb.2(0x20000000), %bb.5(0x20000000), %bb.6(0x20000000)
+ ; CHECK: liveins: $r1
+ ; CHECK: renamable $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r2 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r2 = t2ADDrs killed renamable $r2, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: t2BR_JT killed renamable $r2, killed renamable $r1, %jump-table.0
+ ; CHECK: bb.2.bb2:
+ ; CHECK: t2BTI
+ ; CHECK: renamable $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ ; CHECK: bb.3.sw.epilog:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+ ; CHECK: bb.4.return:
+ ; CHECK: liveins: $r0
+ ; CHECK: t2BTI
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ ; CHECK: bb.5.bb3:
+ ; CHECK: t2BTI
+ ; CHECK: renamable $r0, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ ; CHECK: bb.6.bb4:
+ ; CHECK: t2BTI
+ ; CHECK: renamable $r0, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ bb.0.entry:
+ successors: %bb.5, %bb.1
+ liveins: $r0
+
+ renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 1, 14 /* CC::al */, $noreg
+ tCMPi8 renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.5, 8 /* CC::hi */, killed $cpsr
+
+ bb.1.entry:
+ successors: %bb.6, %bb.2, %bb.3, %bb.4
+ liveins: $r1
+
+ renamable $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ renamable $r2 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ renamable $r2 = t2ADDrs killed renamable $r2, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
+ t2BR_JT killed renamable $r2, killed renamable $r1, %jump-table.0
+
+ bb.2.bb2:
+ renamable $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+ bb.5.sw.epilog:
+ renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+
+ bb.6.return:
+ liveins: $r0
+
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+ bb.3.bb3:
+ renamable $r0, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+ bb.4.bb4:
+ renamable $r0, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll
new file mode 100644
index 0000000..afdfe1c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s
+
+; Check that each outlining candidate and the outlined function are in agreement
+; with regard to whether BTI insertion is enabled or not.
+
+; volatile int a, b, c, d, e, f;
+;
+; int x(int p) {
+; int r = (a + b) / (c + d) * e + f;
+; return r + 1;
+; }
+;
+; __attribute__((target("branch-protection=none")))
+; int y(int p) {
+; int r = (a + b) / (c + d) * e + f;
+; return r + 2;
+; }
+;
+; __attribute__((target("branch-protection=bti")))
+; int z(int p) {
+; int r = (a + b) / (c + d) * e + f;
+; return r + 3;
+; }
+
+@a = hidden global i32 0, align 4
+@b = hidden global i32 0, align 4
+@c = hidden global i32 0, align 4
+@d = hidden global i32 0, align 4
+@e = hidden global i32 0, align 4
+@f = hidden global i32 0, align 4
+
+define hidden i32 @x(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 1
+ ret i32 %add3
+}
+; CHECK-LABEL: x:
+; CHECK-NOT: bti
+; CHECK: bl OUTLINED_FUNCTION_0
+
+define hidden i32 @y(i32 %p) local_unnamed_addr #1 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 2
+ ret i32 %add3
+}
+; CHECK-LABEL: y:
+; CHECK-NOT: bti
+; CHECK: bl OUTLINED_FUNCTION_0
+
+define hidden i32 @z(i32 %p) local_unnamed_addr #2 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 3
+ ret i32 %add3
+}
+; CHECK-LABEL: z:
+; CHECK bti
+; CHECK-NOT: bl OUTLINED_FUNCTION
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK-NOT: bti
+
+attributes #0 = { minsize nofree norecurse nounwind optsize }
+attributes #1 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="false" }
+attributes #2 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="true" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"branch-target-enforcement", i32 0}
diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll
new file mode 100644
index 0000000..b0d03d5
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s
+
+; See bti-outliner-1.ll
+; Difference is the BTI placement is enabled by default for the entire module.
+
+@a = hidden global i32 0, align 4
+@b = hidden global i32 0, align 4
+@c = hidden global i32 0, align 4
+@d = hidden global i32 0, align 4
+@e = hidden global i32 0, align 4
+@f = hidden global i32 0, align 4
+
+define hidden i32 @x(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 1
+ ret i32 %add3
+}
+; CHECK-LABEL: x:
+; CHECK: bti
+; CHECK: bl OUTLINED_FUNCTION_0
+
+define hidden i32 @y(i32 %p) local_unnamed_addr #1 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 2
+ ret i32 %add3
+}
+; CHECK-LABEL: y:
+; CHECK-NOT: bti
+; CHECK-NOT: bl OUTLINED_FUNCTION
+
+define hidden i32 @z(i32 %p) local_unnamed_addr #2 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %5 = load volatile i32, i32* @f, align 4
+ %add2 = add nsw i32 %mul, %5
+ %add3 = add nsw i32 %add2, 3
+ ret i32 %add3
+}
+; CHECK-LABEL: z:
+; CHECK: bti
+; CHECK: bl OUTLINED_FUNCTION_0
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK: bti
+
+attributes #0 = { minsize nofree norecurse nounwind optsize }
+attributes #1 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="false" }
+attributes #2 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="true" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"branch-target-enforcement", i32 1}
diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll
new file mode 100644
index 0000000..3c800ad
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s
+
+; Check an edge case of the outlining costs -
+; outlining occurs in this test and does not in `bti-outliner-cost-2.ll`
+; the only difference being the branch target enforcement is enabled in the
+; latter one.
+
+; volatile int a, b, c, d, e;
+;
+; int y(int p) {
+; int r = (a + b) / (c + d) * e;
+; return r + 1;
+; }
+;
+; int y(int p) {
+; int r = (a + b) / (c + d) * e;
+; return r + 2;
+; }
+
+@a = hidden global i32 0, align 4
+@b = hidden global i32 0, align 4
+@c = hidden global i32 0, align 4
+@d = hidden global i32 0, align 4
+@e = hidden global i32 0, align 4
+
+define hidden i32 @x(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %add2 = add nsw i32 %mul, 1
+ ret i32 %add2
+}
+; CHECK-LABEL: x:
+; CHECK: bl OUTLINED_FUNCTION_0
+
+define hidden i32 @y(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %add2 = add nsw i32 %mul, 2
+ ret i32 %add2
+}
+; CHECK-LABEL: y:
+; CHECK: bl OUTLINED_FUNCTION_0
+
+; CHECK-LABEL: OUTLINED_FUNCTION_0:
+; CHECK-NOT: bti
+
+attributes #0 = { minsize nofree norecurse nounwind optsize }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"branch-target-enforcement", i32 0}
diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll
new file mode 100644
index 0000000..2cb6e2c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s
+
+; See `bti-outliner-cost-1.ll`
+
+@a = hidden global i32 0, align 4
+@b = hidden global i32 0, align 4
+@c = hidden global i32 0, align 4
+@d = hidden global i32 0, align 4
+@e = hidden global i32 0, align 4
+
+define hidden i32 @x(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %add2 = add nsw i32 %mul, 1
+ ret i32 %add2
+}
+; CHECK-LABEL: x:
+; CHECK-NOT: bl OUTLINED_FUNCTION
+
+define hidden i32 @y(i32 %p) local_unnamed_addr #0 {
+entry:
+ %0 = load volatile i32, i32* @a, align 4
+ %1 = load volatile i32, i32* @b, align 4
+ %add = add nsw i32 %1, %0
+ %2 = load volatile i32, i32* @c, align 4
+ %3 = load volatile i32, i32* @d, align 4
+ %add1 = add nsw i32 %3, %2
+ %div = sdiv i32 %add, %add1
+ %4 = load volatile i32, i32* @e, align 4
+ %mul = mul nsw i32 %4, %div
+ %add2 = add nsw i32 %mul, 2
+ ret i32 %add2
+}
+; CHECK-LABEL: y:
+; CHECK-NOT: bl OUTLINED_FUNCTION
+
+; CHECK-NOT: OUTLINED_FUNCTION
+
+attributes #0 = { minsize nofree norecurse nounwind optsize }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"branch-target-enforcement", i32 1}