aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Campos <victor.campos@arm.com>2024-04-04 12:44:32 +0100
committerGitHub <noreply@github.com>2024-04-04 12:44:32 +0100
commit5ad320abe36357e3290007d3ab353e8637f33720 (patch)
tree18fadf453c3fa048be0e0d83cb324544e3c1653e
parent3871eaba6bd016b229f2d0e4b75e2be3b65e39a7 (diff)
downloadllvm-5ad320abe36357e3290007d3ab353e8637f33720.zip
llvm-5ad320abe36357e3290007d3ab353e8637f33720.tar.gz
llvm-5ad320abe36357e3290007d3ab353e8637f33720.tar.bz2
[ARM][Thumb2] Mark BTI-clearing instructions as scheduling region boundaries (#79173)
Following https://github.com/llvm/llvm-project/pull/68313 this patch extends the idea to M-profile PACBTI. The Machine Scheduler can reorder instructions within a scheduling region depending on the scheduling policy set. If a BTI-clearing instruction happens to partake in one such region, it might be moved around, therefore ending up where it shouldn't. The solution is to mark all BTI-clearing instructions as scheduling region boundaries. This essentially means that they must not be part of any scheduling region, and as consequence never get moved: - PAC - PACBTI - BTI - SG Note that PAC isn't BTI-clearing, but it's replaced by PACBTI late in the compilation pipeline. As far as I know, currently it isn't possible to organically obtain code that's susceptible to the bug: - Instructions that write to SP are region boundaries. PAC seems to always be followed by the pushing of r12 to the stack, so essentially PAC is always by itself in a scheduling region. - CALL_BTI is expanded into a machine instruction bundle. Bundles are unpacked only after the last machine scheduler run. Thus setjmp and BTI can be separated only if someone deliberately run the scheduler once more. - The BTI insertion pass is run late in the pipeline, only after the last machine scheduling has run. So once again it can be reordered only if someone deliberately runs the scheduler again. Nevertheless, one can reasonably argue that we should prevent the bug in spite of the compiler not being able to produce the required conditions for it. If things change, the compiler will be robust against this issue. The tests written for this are contrived: bogus MIR instructions have been added adjacent to the BTI-clearing instructions in order to have them inside non-trivial scheduling regions.
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp19
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h4
-rw-r--r--llvm/test/CodeGen/ARM/misched-branch-targets.mir166
3 files changed, 189 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 083f25f..fc2834c 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -286,6 +286,25 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
+bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // BTI clearing instructions shall not take part in scheduling regions as
+ // they must stay in their intended place. Although PAC isn't BTI clearing,
+ // it can be transformed into PACBTI after the pre-RA Machine Scheduling
+ // has taken place, so its movement must also be restricted.
+ switch (MI.getOpcode()) {
+ case ARM::t2BTI:
+ case ARM::t2PAC:
+ case ARM::t2PACBTI:
+ case ARM::t2SG:
+ return true;
+ default:
+ break;
+ }
+ return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, Register DestReg,
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 4bb412f..8915da8 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -68,6 +68,10 @@ public:
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir
new file mode 100644
index 0000000..b071fbd
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/misched-branch-targets.mir
@@ -0,0 +1,166 @@
+# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s
+# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8.1m.main-arm-none-eabi"
+
+ define i32 @foo_bti() #0 {
+ entry:
+ ret i32 0
+ }
+
+ define i32 @foo_pac() #0 {
+ entry:
+ ret i32 0
+ }
+
+ define i32 @foo_pacbti() #0 {
+ entry:
+ ret i32 0
+ }
+
+ define i32 @foo_setjmp() #0 {
+ entry:
+ ret i32 0
+ if.then:
+ ret i32 0
+ }
+
+ define i32 @foo_sg() #0 {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @setjmp(ptr noundef) #1
+ declare void @longjmp(ptr noundef, i32 noundef) #2
+
+ attributes #0 = { "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" }
+ attributes #1 = { nounwind returns_twice "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" }
+ attributes #2 = { noreturn nounwind "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" }
+
+...
+---
+name: foo_bti
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $r0
+
+ t2BTI
+ renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+
+...
+
+# CHECK-LABEL: name: foo_bti
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: liveins: $r0
+# CHECK-NEXT: {{^ +$}}
+# CHECK-NEXT: t2BTI
+
+---
+name: foo_pac
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $r0, $lr, $r12
+
+ frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp
+ renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg
+ $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
+ $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg
+ early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg
+ $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+ $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
+ t2AUT implicit $r12, implicit $lr, implicit $sp
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...
+
+# CHECK-LABEL: name: foo_pac
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: liveins: $r0, $lr, $r12
+# CHECK-NEXT: {{^ +$}}
+# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp
+
+---
+name: foo_pacbti
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $r0, $lr, $r12
+
+ frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp
+ renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg
+ $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
+ $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg
+ early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg
+ $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+ $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
+ t2AUT implicit $r12, implicit $lr, implicit $sp
+ tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...
+
+# CHECK-LABEL: name: foo_pacbti
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: liveins: $r0, $lr, $r12
+# CHECK-NEXT: {{^ +$}}
+# CHECK-NEXT: frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp
+
+---
+name: foo_setjmp
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1
+ liveins: $lr
+
+ frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+ $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
+ $sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg
+ renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
+ t2BTI
+ renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg
+ tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2IT 0, 2, implicit-def $itstate
+ renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate
+ $sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate
+ frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate
+
+ bb.1.if.then:
+ renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg
+ renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp
+
+...
+
+# CHECK-LABEL: name: foo_setjmp
+# CHECK: body:
+# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
+# CHECK-NEXT: t2BTI
+
+---
+name: foo_sg
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $r0
+
+ t2SG 14 /* CC::al */, $noreg
+ renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+
+...
+
+# CHECK-LABEL: name: foo_sg
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: liveins: $r0
+# CHECK-NEXT: {{^ +$}}
+# CHECK-NEXT: t2SG