; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I ; Tests copied from AArch64. ; Dynamically-sized allocation, needs a loop which can handle any size at ; runtime. The final iteration of the loop will temporarily put SP below the ; target address, but this doesn't break any of the ABI constraints on the ; stack, and also doesn't probe below the target SP value. define void @dynamic(i64 %size, ptr %out) #0 { ; RV64I-LABEL: dynamic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: .cfi_def_cfa_offset 16 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: addi s0, sp, 16 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB0_1 ; RV64I-NEXT: # %bb.2: ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: sd a0, 0(a1) ; RV64I-NEXT: addi sp, s0, -16 ; RV64I-NEXT: .cfi_def_cfa sp, 16 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: dynamic: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: .cfi_def_cfa_offset 16 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: addi s0, sp, 16 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB0_1 ; RV32I-NEXT: # %bb.2: ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: addi sp, s0, -16 ; RV32I-NEXT: .cfi_def_cfa sp, 16 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret %v = alloca i8, i64 %size, align 1 store ptr %v, ptr %out, align 8 ret void } ; This function has a fixed-size stack slot and a dynamic one. The fixed size ; slot isn't large enough that we would normally probe it, but we need to do so ; here otherwise the gap between the CSR save and the first probe of the ; dynamic allocation could be too far apart when the size of the dynamic ; allocation is close to the guard size. define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { ; RV64I-LABEL: dynamic_fixed: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: .cfi_def_cfa_offset 80 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: addi s0, sp, 80 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: addi a3, s0, -80 ; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: sd a3, 0(a1) ; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: lui a1, 1 ; RV64I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a1 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB1_1 ; RV64I-NEXT: # %bb.2: ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: sd a0, 0(a2) ; RV64I-NEXT: addi sp, s0, -80 ; RV64I-NEXT: .cfi_def_cfa sp, 80 ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: dynamic_fixed: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: .cfi_def_cfa_offset 80 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: addi s0, sp, 80 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: addi a1, s0, -72 ; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: sw a1, 0(a2) ; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB1_1 ; RV32I-NEXT: # %bb.2: ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: sw a0, 0(a3) ; RV32I-NEXT: addi sp, s0, -80 ; RV32I-NEXT: .cfi_def_cfa sp, 80 ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret %v1 = alloca i8, i64 64, align 1 store ptr %v1, ptr %out1, align 8 %v2 = alloca i8, i64 %size, align 1 store ptr %v2, ptr %out2, align 8 ret void } ; Dynamic allocation, with an alignment requirement greater than the alignment ; of SP. Done by ANDing the target SP with a constant to align it down, then ; doing the loop as normal. Note that we also re-align the stack in the prolog, ; which isn't actually needed because the only aligned allocations are dynamic, ; this is done even without stack probing. define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; RV64I-LABEL: dynamic_align_64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 ; RV64I-NEXT: .cfi_def_cfa_offset 64 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: addi s0, sp, 64 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: andi sp, sp, -64 ; RV64I-NEXT: mv s1, sp ; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: andi a0, a0, -64 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB2_1 ; RV64I-NEXT: # %bb.2: ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: sd a0, 0(a1) ; RV64I-NEXT: addi sp, s0, -64 ; RV64I-NEXT: .cfi_def_cfa sp, 64 ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: .cfi_restore s1 ; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: dynamic_align_64: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -64 ; RV32I-NEXT: .cfi_def_cfa_offset 64 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: addi s0, sp, 64 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: andi sp, sp, -64 ; RV32I-NEXT: mv s1, sp ; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: andi a0, a0, -64 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB2_1 ; RV32I-NEXT: # %bb.2: ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: addi sp, s0, -64 ; RV32I-NEXT: .cfi_def_cfa sp, 64 ; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: .cfi_restore s1 ; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret %v = alloca i8, i64 %size, align 64 store ptr %v, ptr %out, align 8 ret void } ; Dynamic allocation, with an alignment greater than the stack guard size. The ; only difference to the dynamic allocation is the constant used for aligning ; the target SP, the loop will probe the whole allocation without needing to ; know about the alignment padding. define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; RV64I-LABEL: dynamic_align_8192: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -2032 ; RV64I-NEXT: .cfi_def_cfa_offset 2032 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: addi s0, sp, 2032 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: addi sp, sp, -2048 ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: srli a2, sp, 13 ; RV64I-NEXT: slli sp, a2, 13 ; RV64I-NEXT: mv s1, sp ; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: lui a2, 1048574 ; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB3_1 ; RV64I-NEXT: # %bb.2: ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: sd a0, 0(a1) ; RV64I-NEXT: addi sp, s0, -2032 ; RV64I-NEXT: .cfi_def_cfa sp, 2032 ; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: .cfi_restore s1 ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: dynamic_align_8192: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -2032 ; RV32I-NEXT: .cfi_def_cfa_offset 2032 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: addi s0, sp, 2032 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: addi sp, sp, -2048 ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: srli a1, sp, 13 ; RV32I-NEXT: slli sp, a1, 13 ; RV32I-NEXT: mv s1, sp ; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: lui a1, 1048574 ; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB3_1 ; RV32I-NEXT: # %bb.2: ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: addi sp, s0, -2032 ; RV32I-NEXT: .cfi_def_cfa sp, 2032 ; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 2020(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: .cfi_restore s1 ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret %v = alloca i8, i64 %size, align 8192 store ptr %v, ptr %out, align 8 ret void } ; If a function has variable-sized stack objects, then any function calls which ; need to pass arguments on the stack must allocate the stack space for them ; dynamically, to ensure they are at the bottom of the frame. define void @no_reserved_call_frame(i64 %n) #0 { ; RV64I-LABEL: no_reserved_call_frame: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: .cfi_def_cfa_offset 16 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: addi s0, sp, 16 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: lui a1, 1 ; RV64I-NEXT: .LBB4_1: # %entry ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a1 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %entry ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: lui a1, 1 ; RV64I-NEXT: sub sp, sp, a1 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee_stack_args ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: add sp, sp, a0 ; RV64I-NEXT: addi sp, s0, -16 ; RV64I-NEXT: .cfi_def_cfa sp, 16 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: no_reserved_call_frame: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: .cfi_def_cfa_offset 16 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: addi s0, sp, 16 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB4_1: # %entry ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB4_1 ; RV32I-NEXT: # %bb.2: # %entry ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: call callee_stack_args ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: add sp, sp, a0 ; RV32I-NEXT: addi sp, s0, -16 ; RV32I-NEXT: .cfi_def_cfa sp, 16 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret entry: %v = alloca i32, i64 %n call void @callee_stack_args(ptr %v, [518 x i64] poison) ret void } ; Same as above but without a variable-sized allocation, so the reserved call ; frame can be folded into the fixed-size allocation in the prologue. define void @reserved_call_frame(i64 %n) #0 { ; RV64I-LABEL: reserved_call_frame: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi sp, sp, -2032 ; RV64I-NEXT: .cfi_def_cfa_offset 2032 ; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: sub sp, sp, a0 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: .cfi_def_cfa_offset 4096 ; RV64I-NEXT: addi sp, sp, -48 ; RV64I-NEXT: .cfi_def_cfa_offset 4144 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: add a0, sp, a0 ; RV64I-NEXT: call callee_stack_args ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addi a0, a0, 48 ; RV64I-NEXT: add sp, sp, a0 ; RV64I-NEXT: .cfi_def_cfa_offset 2032 ; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: reserved_call_frame: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: addi sp, sp, -2032 ; RV32I-NEXT: .cfi_def_cfa_offset 2032 ; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: sub sp, sp, a0 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: .cfi_def_cfa_offset 4096 ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: .cfi_def_cfa_offset 4176 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, 36 ; RV32I-NEXT: add a0, sp, a0 ; RV32I-NEXT: call callee_stack_args ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, 80 ; RV32I-NEXT: add sp, sp, a0 ; RV32I-NEXT: .cfi_def_cfa_offset 2032 ; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret entry: %v = alloca i32, i64 518 call void @callee_stack_args(ptr %v, [518 x i64] poison) ret void } declare void @callee_stack_args(ptr, [518 x i64]) ; Dynamic allocation of vectors define void @dynamic_vector(i64 %size, ptr %out) #0 { ; RV64I-LABEL: dynamic_vector: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: .cfi_def_cfa_offset 16 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: addi s0, sp, 16 ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: csrr a2, vlenb ; RV64I-NEXT: mul a0, a2, a0 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: sub sp, sp, a2 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: blt a0, sp, .LBB6_1 ; RV64I-NEXT: # %bb.2: ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: sd a0, 0(a1) ; RV64I-NEXT: addi sp, s0, -16 ; RV64I-NEXT: .cfi_def_cfa sp, 16 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: .cfi_restore ra ; RV64I-NEXT: .cfi_restore s0 ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret ; ; RV32I-LABEL: dynamic_vector: ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: .cfi_def_cfa_offset 16 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: addi s0, sp, 16 ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: csrr a1, vlenb ; RV32I-NEXT: mul a0, a1, a0 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: blt a0, sp, .LBB6_1 ; RV32I-NEXT: # %bb.2: ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: addi sp, s0, -16 ; RV32I-NEXT: .cfi_def_cfa sp, 16 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: .cfi_restore ra ; RV32I-NEXT: .cfi_restore s0 ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: .cfi_def_cfa_offset 0 ; RV32I-NEXT: ret %v = alloca , i64 %size, align 16 store ptr %v, ptr %out, align 8 ret void } attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }