; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV64IV ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV32IV ; Tests adapted from AArch64. ; Test prolog sequences for stack probing when vector is involved. ; The space for vector objects needs probing in the general case, because ; the stack adjustment may happen to be too big (i.e. greater than the ; probe size). define void @f_vector(ptr %out) #0 { ; RV64IV-LABEL: f_vector: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: slli t1, t1, 1 ; RV64IV-NEXT: .cfi_def_cfa t1, -16 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB0_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB0_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f_vector: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: slli t1, t1, 1 ; RV32IV-NEXT: .cfi_def_cfa t1, -16 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB0_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB0_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: slli a0, a0, 1 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 0 ; RV32IV-NEXT: ret entry: %vec = alloca , align 16 ret void } ; As above, but with 4 vectors of stack space. define void @f4_vector(ptr %out) #0 { ; RV64IV-LABEL: f4_vector: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: slli t1, t1, 3 ; RV64IV-NEXT: .cfi_def_cfa t1, -64 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB1_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB1_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f4_vector: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: slli t1, t1, 3 ; RV32IV-NEXT: .cfi_def_cfa t1, -64 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB1_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB1_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: slli a0, a0, 3 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 0 ; RV32IV-NEXT: ret entry: %vec1 = alloca , align 16 %vec2 = alloca , align 16 %vec3 = alloca , align 16 %vec4 = alloca , align 16 ret void } ; As above, but with 16 vectors of stack space. ; The stack adjustment is less than or equal to 16 x 256 = 4096, so ; we can allocate the locals at once. define void @f16_vector(ptr %out) #0 { ; RV64IV-LABEL: f16_vector: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: slli t1, t1, 5 ; RV64IV-NEXT: .cfi_def_cfa t1, -256 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB2_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB2_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 5 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f16_vector: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: slli t1, t1, 5 ; RV32IV-NEXT: .cfi_def_cfa t1, -256 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB2_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB2_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: slli a0, a0, 5 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 0 ; RV32IV-NEXT: ret entry: %vec1 = alloca , align 16 %vec2 = alloca , align 16 %vec3 = alloca , align 16 %vec4 = alloca , align 16 %vec5 = alloca , align 16 %vec6 = alloca , align 16 %vec7 = alloca , align 16 %vec8 = alloca , align 16 %vec9 = alloca , align 16 %vec10 = alloca , align 16 %vec11 = alloca , align 16 %vec12 = alloca , align 16 %vec13 = alloca , align 16 %vec14 = alloca , align 16 %vec15 = alloca , align 16 %vec16 = alloca , align 16 ret void } ; As above, but with 17 vectors of stack space. define void @f17_vector(ptr %out) #0 { ; RV64IV-LABEL: f17_vector: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: li a0, 34 ; RV64IV-NEXT: mul t1, t1, a0 ; RV64IV-NEXT: .cfi_def_cfa t1, -272 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB3_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB3_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: li a1, 34 ; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f17_vector: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: li a0, 34 ; RV32IV-NEXT: mul t1, t1, a0 ; RV32IV-NEXT: .cfi_def_cfa t1, -272 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB3_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB3_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: li a1, 34 ; RV32IV-NEXT: mul a0, a0, a1 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 0 ; RV32IV-NEXT: ret entry: %vec1 = alloca , align 16 %vec2 = alloca , align 16 %vec3 = alloca , align 16 %vec4 = alloca , align 16 %vec5 = alloca , align 16 %vec6 = alloca , align 16 %vec7 = alloca , align 16 %vec8 = alloca , align 16 %vec9 = alloca , align 16 %vec10 = alloca , align 16 %vec11 = alloca , align 16 %vec12 = alloca , align 16 %vec13 = alloca , align 16 %vec14 = alloca , align 16 %vec15 = alloca , align 16 %vec16 = alloca , align 16 %vec17 = alloca , align 16 ret void } ; A vector and a 16-byte fixed size object. define void @f1_vector_16_arr(ptr %out) #0 { ; RV64IV-LABEL: f1_vector_16_arr: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: addi sp, sp, -16 ; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: slli t1, t1, 1 ; RV64IV-NEXT: .cfi_def_cfa t1, -16 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB4_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB4_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 16 ; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: .cfi_def_cfa_offset 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f1_vector_16_arr: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: addi sp, sp, -16 ; RV32IV-NEXT: .cfi_def_cfa_offset 16 ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: slli t1, t1, 1 ; RV32IV-NEXT: .cfi_def_cfa t1, -16 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB4_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB4_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: slli a0, a0, 1 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 16 ; RV32IV-NEXT: addi sp, sp, 16 ; RV32IV-NEXT: .cfi_def_cfa_offset 0 ; RV32IV-NEXT: ret entry: %vec = alloca , align 16 %arr = alloca i8, i64 16, align 1 ret void } ; A large vector object and a large slot, both of which need probing. define void @f1_vector_4096_arr(ptr %out) #0 { ; RV64IV-LABEL: f1_vector_4096_arr: ; RV64IV: # %bb.0: # %entry ; RV64IV-NEXT: lui a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: .cfi_def_cfa_offset 4096 ; RV64IV-NEXT: lui a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: .cfi_def_cfa_offset 8192 ; RV64IV-NEXT: lui a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: .cfi_def_cfa_offset 12288 ; RV64IV-NEXT: addi sp, sp, -16 ; RV64IV-NEXT: .cfi_def_cfa_offset 12304 ; RV64IV-NEXT: csrr t1, vlenb ; RV64IV-NEXT: slli t1, t1, 7 ; RV64IV-NEXT: .cfi_def_cfa t1, -1024 ; RV64IV-NEXT: lui t2, 1 ; RV64IV-NEXT: .LBB5_1: # %entry ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64IV-NEXT: sub sp, sp, t2 ; RV64IV-NEXT: sd zero, 0(sp) ; RV64IV-NEXT: sub t1, t1, t2 ; RV64IV-NEXT: bge t1, t2, .LBB5_1 ; RV64IV-NEXT: # %bb.2: # %entry ; RV64IV-NEXT: .cfi_def_cfa_register sp ; RV64IV-NEXT: sub sp, sp, t1 ; RV64IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 7 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa sp, 12304 ; RV64IV-NEXT: lui a0, 3 ; RV64IV-NEXT: addi a0, a0, 16 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: .cfi_def_cfa_offset 0 ; RV64IV-NEXT: ret ; ; RV32IV-LABEL: f1_vector_4096_arr: ; RV32IV: # %bb.0: # %entry ; RV32IV-NEXT: lui a0, 1 ; RV32IV-NEXT: sub sp, sp, a0 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: .cfi_def_cfa_offset 4096 ; RV32IV-NEXT: lui a0, 1 ; RV32IV-NEXT: sub sp, sp, a0 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: .cfi_def_cfa_offset 8192 ; RV32IV-NEXT: lui a0, 1 ; RV32IV-NEXT: sub sp, sp, a0 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: .cfi_def_cfa_offset 12288 ; RV32IV-NEXT: addi sp, sp, -16 ; RV32IV-NEXT: .cfi_def_cfa_offset 12304 ; RV32IV-NEXT: csrr t1, vlenb ; RV32IV-NEXT: slli t1, t1, 7 ; RV32IV-NEXT: .cfi_def_cfa t1, -1024 ; RV32IV-NEXT: lui t2, 1 ; RV32IV-NEXT: .LBB5_1: # %entry ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32IV-NEXT: sub sp, sp, t2 ; RV32IV-NEXT: sw zero, 0(sp) ; RV32IV-NEXT: sub t1, t1, t2 ; RV32IV-NEXT: bge t1, t2, .LBB5_1 ; RV32IV-NEXT: # %bb.2: # %entry ; RV32IV-NEXT: .cfi_def_cfa_register sp ; RV32IV-NEXT: sub sp, sp, t1 ; RV32IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb ; RV32IV-NEXT: csrr a0, vlenb ; RV32IV-NEXT: slli a0, a0, 7 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa sp, 12304 ; RV32IV-NEXT: lui a0, 3 ; RV32IV-NEXT: addi a0, a0, 16 ; RV32IV-NEXT: add sp, sp, a0 ; RV32IV-NEXT: .cfi_def_cfa_offset 0 ; RV32IV-NEXT: ret entry: %vec = alloca , align 16 %arr = alloca i8, i64 12288, align 1 ret void } attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }