aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2017-11-14 20:40:11 +0000
committerTom Stellard <tstellar@redhat.com>2017-11-14 20:40:11 +0000
commitee6a0ad5af8db1dfb7acec34e014a01510214fb4 (patch)
tree9313def9c6c19413e667c10c721488a101bc6a19
parent3547c904e227316ed33a94ee3db5a630ffc22805 (diff)
downloadllvm-ee6a0ad5af8db1dfb7acec34e014a01510214fb4.zip
llvm-ee6a0ad5af8db1dfb7acec34e014a01510214fb4.tar.gz
llvm-ee6a0ad5af8db1dfb7acec34e014a01510214fb4.tar.bz2
Merging r317204 and r318172:
------------------------------------------------------------------------ r317204 | sdardis | 2017-11-02 05:47:22 -0700 (Thu, 02 Nov 2017) | 15 lines [mips] Use register scavenging with MSA. MSA stores and loads to the stack are more likely to require an emergency GPR spill slot due to the smaller offsets available with those instructions. Handle this by overestimating the size of the stack by determining the largest offset presuming that all callee save registers are spilled and accounting of incoming arguments when determining whether an emergency spill slot is required. Reviewers: atanasyan Differential Revision: https://reviews.llvm.org/D39056 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r318172 | sdardis | 2017-11-14 11:11:45 -0800 (Tue, 14 Nov 2017) | 5 lines [mips] Simplify test for 5.0.1 (NFC) Simplify testing that an emergency spill slot is used when MSA is used so that it can be included in the 5.0.1 release. ------------------------------------------------------------------------ llvm-svn: 318191
-rw-r--r--llvm/lib/Target/Mips/MipsFrameLowering.cpp35
-rw-r--r--llvm/lib/Target/Mips/MipsSEFrameLowering.cpp8
-rw-r--r--llvm/test/CodeGen/Mips/msa/emergency-spill.mir212
-rw-r--r--llvm/test/CodeGen/Mips/msa/frameindex.ll49
4 files changed, 263 insertions, 41 deletions
diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index ef05166..27a8597 100644
--- a/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
}
+// Estimate the size of the stack, including the incoming arguments. We need to
+// account for register spills, local objects, reserved call frame and incoming
+// arguments. This is required to determine the largest possible positive offset
+// from $sp so that it can be determined if an emergency spill slot for stack
+// addresses is required.
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- int64_t Offset = 0;
+ int64_t Size = 0;
- // Iterate over fixed sized objects.
+ // Iterate over fixed sized objects which are incoming arguments.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- Offset = std::max(Offset, -MFI.getObjectOffset(I));
+ if (MFI.getObjectOffset(I) > 0)
+ Size += MFI.getObjectSize(I);
// Conservatively assume all callee-saved registers will be saved.
for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
- unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
- Offset = alignTo(Offset + Size, Size);
+ unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
+ Size = alignTo(Size + RegSize, RegSize);
}
- unsigned MaxAlign = MFI.getMaxAlignment();
-
- // Check that MaxAlign is not zero if there is a stack object that is not a
- // callee-saved spill.
- assert(!MFI.getObjectIndexEnd() || MaxAlign);
-
- // Iterate over other objects.
- for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
- Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
-
- // Call frame.
- if (MFI.adjustsStack() && hasReservedCallFrame(MF))
- Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
- std::max(MaxAlign, getStackAlignment()));
-
- return alignTo(Offset, getStackAlignment());
+ // Get the size of the rest of the frame objects and any possible reserved
+ // call frame, accounting for alignment.
+ return Size + MFI.estimateStackSize(MF);
}
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 102ebb2..735461c 100644
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -894,10 +894,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// Set scavenging frame index if necessary.
- uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
- estimateStackSize(MF);
+ uint64_t MaxSPOffset = estimateStackSize(MF);
- if (isInt<16>(MaxSPOffset))
+ // MSA has a minimum offset of 10 bits signed. If there is a variable
+ // sized object on the stack, the estimation cannot account for it.
+ if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) &&
+ !MF.getFrameInfo().hasVarSizedObjects())
return;
const TargetRegisterClass &RC =
diff --git a/llvm/test/CodeGen/Mips/msa/emergency-spill.mir b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir
new file mode 100644
index 0000000..a53368e
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir
@@ -0,0 +1,212 @@
+# RUN: llc %s -start-after=shrink-wrap -march=mips64 -mcpu=mips64r6 -mattr=+fp64,+msa -o /dev/null
+
+# Test that estimated size of the stack leads to the creation of an emergency
+# spill when MSA is in use. Previously, this test case would fail during
+# register scavenging due to the lack of a spill slot.
+--- |
+ define inreg { i64, i64 } @test(i64 inreg %a.coerce0, i64 inreg %a.coerce1, i64 inreg %b.coerce0, i64 inreg %b.coerce1, i32 signext %c) #0 {
+ entry:
+ %retval = alloca <16 x i8>, align 16
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ %a.addr = alloca <16 x i8>, align 16
+ %b.addr = alloca <16 x i8>, align 16
+ %c.addr = alloca i32, align 4
+ %g = alloca <16 x i8>*, align 8
+ %d = alloca i8*, align 8
+ %0 = bitcast <16 x i8>* %a to { i64, i64 }*
+ %1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0
+ store i64 %a.coerce0, i64* %1, align 16
+ %2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1
+ store i64 %a.coerce1, i64* %2, align 8
+ %a1 = load <16 x i8>, <16 x i8>* %a, align 16
+ %3 = bitcast <16 x i8>* %b to { i64, i64 }*
+ %4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0
+ store i64 %b.coerce0, i64* %4, align 16
+ %5 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1
+ store i64 %b.coerce1, i64* %5, align 8
+ %b2 = load <16 x i8>, <16 x i8>* %b, align 16
+ store <16 x i8> %a1, <16 x i8>* %a.addr, align 16
+ store <16 x i8> %b2, <16 x i8>* %b.addr, align 16
+ store i32 %c, i32* %c.addr, align 4
+ %6 = alloca i8, i64 6400, align 16
+ %7 = bitcast i8* %6 to <16 x i8>*
+ store <16 x i8>* %7, <16 x i8>** %g, align 8
+ %8 = load <16 x i8>*, <16 x i8>** %g, align 8
+ call void @h(<16 x i8>* %b.addr, <16 x i8>* %8)
+ %9 = load <16 x i8>*, <16 x i8>** %g, align 8
+ %10 = bitcast <16 x i8>* %9 to i8*
+ store i8* %10, i8** %d, align 8
+ %11 = load <16 x i8>, <16 x i8>* %a.addr, align 16
+ %12 = load i8*, i8** %d, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %12, i64 0
+ %13 = load i8, i8* %arrayidx, align 1
+ %conv = sext i8 %13 to i32
+ %14 = call <16 x i8> @llvm.mips.fill.b(i32 %conv)
+ %add = add <16 x i8> %11, %14
+ %15 = load i8*, i8** %d, align 8
+ %arrayidx3 = getelementptr inbounds i8, i8* %15, i64 1
+ %16 = load i8, i8* %arrayidx3, align 1
+ %conv4 = sext i8 %16 to i32
+ %17 = call <16 x i8> @llvm.mips.fill.b(i32 %conv4)
+ %add5 = add <16 x i8> %add, %17
+ %18 = load <16 x i8>, <16 x i8>* %b.addr, align 16
+ %add6 = add <16 x i8> %18, %add5
+ store <16 x i8> %add6, <16 x i8>* %b.addr, align 16
+ %19 = load <16 x i8>, <16 x i8>* %b.addr, align 16
+ store <16 x i8> %19, <16 x i8>* %retval, align 16
+ %20 = bitcast <16 x i8>* %retval to { i64, i64 }*
+ %21 = load { i64, i64 }, { i64, i64 }* %20, align 16
+ ret { i64, i64 } %21
+ }
+
+ declare void @h(<16 x i8>*, <16 x i8>*)
+
+ declare <16 x i8> @llvm.mips.fill.b(i32)
+
+ declare void @llvm.stackprotector(i8*, i8**)
+
+...
+---
+name: test
+alignment: 3
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+liveins:
+ - { reg: '%a0_64', virtual-reg: '' }
+ - { reg: '%a1_64', virtual-reg: '' }
+ - { reg: '%a2_64', virtual-reg: '' }
+ - { reg: '%a3_64', virtual-reg: '' }
+ - { reg: '%t0_64', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 16
+ adjustsStack: false
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+ - { id: 0, name: retval, type: default, offset: 0, size: 16, alignment: 16,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 1, name: a, type: default, offset: 0, size: 16, alignment: 16,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 2, name: b, type: default, offset: 0, size: 16, alignment: 16,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 3, name: a.addr, type: default, offset: 0, size: 16, alignment: 16,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 4, name: b.addr, type: default, offset: 0, size: 16, alignment: 16,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 5, name: c.addr, type: default, offset: 0, size: 4, alignment: 4,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 6, name: g, type: default, offset: 0, size: 8, alignment: 8,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 7, name: d, type: default, offset: 0, size: 8, alignment: 8,
+ callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+ - { id: 8, name: '', type: default, offset: 0, size: 6400,
+ alignment: 16, callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+constants:
+body: |
+ bb.0.entry:
+ liveins: %a0_64, %a1_64, %a2_64, %a3_64, %t0_64
+
+ SD killed %a0_64, %stack.1.a, 0 :: (store 8 into %ir.1, align 16)
+ SD killed %a1_64, %stack.1.a, 8 :: (store 8 into %ir.2)
+ %w0 = LD_B %stack.1.a, 0 :: (dereferenceable load 16 from %ir.a)
+ SD killed %a2_64, %stack.2.b, 0 :: (store 8 into %ir.4, align 16)
+ SD killed %a3_64, %stack.2.b, 8 :: (store 8 into %ir.5)
+ %w1 = LD_B %stack.2.b, 0 :: (dereferenceable load 16 from %ir.b)
+ ST_B killed %w0, %stack.3.a.addr, 0 :: (store 16 into %ir.a.addr)
+ ST_B killed %w1, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
+ SW %t0, %stack.5.c.addr, 0, implicit killed %t0_64 :: (store 4 into %ir.c.addr)
+ %at_64 = LEA_ADDiu64 %stack.8, 0
+ SD killed %at_64, %stack.6.g, 0 :: (store 8 into %ir.g)
+ %a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+ %a0_64 = LEA_ADDiu64 %stack.4.b.addr, 0
+ JAL @h, csr_n64, implicit-def dead %ra, implicit %a0_64, implicit %a1_64, implicit-def %sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+ %at_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %v0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %v1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %a0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %a2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %a3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %s7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t8_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %t9_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %ra_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+ %w0 = LD_B %stack.3.a.addr, 0 :: (dereferenceable load 16 from %ir.a.addr)
+ SD %at_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %v0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %v1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %a0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %a1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %a2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %a3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %s7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t8_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %t9_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ SD %ra_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+ %at_64 = LD %stack.7.d, 0 :: (dereferenceable load 8 from %ir.d)
+ %v0 = LB %at_64, 0 :: (load 1 from %ir.arrayidx)
+ %w1 = FILL_B killed %v0
+ %w0 = ADDV_B killed %w0, killed %w1
+ %at = LB killed %at_64, 1 :: (load 1 from %ir.arrayidx3)
+ %w1 = FILL_B killed %at
+ %w0 = ADDV_B killed %w0, killed %w1
+ %w1 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
+ %w0 = ADDV_B killed %w1, killed %w0
+ ST_B killed %w0, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
+ %w0 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
+ ST_B killed %w0, %stack.0.retval, 0 :: (store 16 into %ir.retval)
+ %v0_64 = LD %stack.0.retval, 0 :: (dereferenceable load 8 from %ir.20, align 16)
+ %v1_64 = LD %stack.0.retval, 8 :: (dereferenceable load 8 from %ir.20 + 8, align 16)
+ RetRA implicit %v0_64, implicit %v1_64
+
+...
diff --git a/llvm/test/CodeGen/Mips/msa/frameindex.ll b/llvm/test/CodeGen/Mips/msa/frameindex.ll
index f903381..9c2228d 100644
--- a/llvm/test/CodeGen/Mips/msa/frameindex.ll
+++ b/llvm/test/CodeGen/Mips/msa/frameindex.ll
@@ -18,7 +18,8 @@ define void @loadstore_v16i8_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v16i8_just_under_simm10:
%1 = alloca <16 x i8>
- %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
+ %2 = alloca [492 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 512 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
@@ -33,7 +34,8 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v16i8_just_over_simm10:
%1 = alloca <16 x i8>
- %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
+ %2 = alloca [497 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 512 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
@@ -50,7 +52,8 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v16i8_just_under_simm16:
%1 = alloca <16 x i8>
- %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+ %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 32768 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -69,7 +72,8 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v16i8_just_over_simm16:
%1 = alloca <16 x i8>
- %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+ %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 32768 bytes
%3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -121,7 +125,8 @@ define void @loadstore_v8i16_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v8i16_just_under_simm10:
%1 = alloca <8 x i16>
- %2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
+ %2 = alloca [1004 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 1024 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
@@ -136,7 +141,8 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v8i16_just_over_simm10:
%1 = alloca <8 x i16>
- %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
+ %2 = alloca [1009 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 1024 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
@@ -153,7 +159,8 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v8i16_just_under_simm16:
%1 = alloca <8 x i16>
- %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+ %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 32768 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -172,7 +179,8 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v8i16_just_over_simm16:
%1 = alloca <8 x i16>
- %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+ %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 32768 bytes
%3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -224,7 +232,8 @@ define void @loadstore_v4i32_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v4i32_just_under_simm10:
%1 = alloca <4 x i32>
- %2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
+ %2 = alloca [2028 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 2048 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
@@ -239,7 +248,8 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v4i32_just_over_simm10:
%1 = alloca <4 x i32>
- %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
+ %2 = alloca [2033 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 2048 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
@@ -256,7 +266,8 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v4i32_just_under_simm16:
%1 = alloca <4 x i32>
- %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+ %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot-- right up to 32768 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -275,7 +286,8 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v4i32_just_over_simm16:
%1 = alloca <4 x i32>
- %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+ %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 32768 bytes
%3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -327,8 +339,8 @@ define void @loadstore_v2i64_just_under_simm10() nounwind {
; MIPS32-AE: loadstore_v2i64_just_under_simm10:
%1 = alloca <2 x i64>
- %2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
-
+ %2 = alloca [4076 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 4096 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
store volatile <2 x i64> %3, <2 x i64>* %1
@@ -342,7 +354,8 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
; MIPS32-AE: loadstore_v2i64_just_over_simm10:
%1 = alloca <2 x i64>
- %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
+ %2 = alloca [4081 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 4096 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
@@ -359,7 +372,8 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
; MIPS32-AE: loadstore_v2i64_just_under_simm16:
%1 = alloca <2 x i64>
- %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+ %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--right up to 32768 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -378,7 +392,8 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
; MIPS32-AE: loadstore_v2i64_just_over_simm16:
%1 = alloca <2 x i64>
- %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+ %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+ ; slot--just over 32768 bytes
%3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768