; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS ; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS ; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32 ; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %} ; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52. define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-32-LABEL: test_dynamic_stackalloc( ; CHECK-32: { ; CHECK-32-NEXT: .reg .b32 %r<8>; ; CHECK-32-EMPTY: ; CHECK-32-NEXT: // %bb.0: ; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_param_0]; ; CHECK-32-NEXT: add.s32 %r2, %r1, 7; ; CHECK-32-NEXT: and.b32 %r3, %r2, -8; ; CHECK-32-NEXT: alloca.u32 %r4, %r3, 16; ; CHECK-32-NEXT: cvta.local.u32 %r5, %r4; ; CHECK-32-NEXT: { // callseq 0, 0 ; CHECK-32-NEXT: .param .b32 param0; ; CHECK-32-NEXT: .param .b32 retval0; ; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: call.uni (retval0), bar, (param0); ; CHECK-32-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-32-NEXT: } // callseq 0 ; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-32-NEXT: ret; ; ; CHECK-64-LABEL: test_dynamic_stackalloc( ; CHECK-64: { ; CHECK-64-NEXT: .reg .b32 %r<3>; ; CHECK-64-NEXT: .reg .b64 %rd<6>; ; CHECK-64-EMPTY: ; CHECK-64-NEXT: // %bb.0: ; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_param_0]; ; CHECK-64-NEXT: add.s64 %rd2, %rd1, 7; ; CHECK-64-NEXT: and.b64 %rd3, %rd2, -8; ; CHECK-64-NEXT: alloca.u64 %rd4, %rd3, 16; ; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4; ; CHECK-64-NEXT: { // callseq 0, 0 ; CHECK-64-NEXT: .param .b64 param0; ; CHECK-64-NEXT: .param .b32 retval0; ; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: call.uni (retval0), bar, (param0); ; CHECK-64-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-64-NEXT: } // callseq 0 ; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-64-NEXT: ret; %alloca = alloca i8, i64 %n, align 16 %call = call i32 @bar(ptr %alloca) ret i32 %call } define float @test_dynamic_stackalloc_unaligned(i64 %0) { ; CHECK-32-LABEL: test_dynamic_stackalloc_unaligned( ; CHECK-32: { ; CHECK-32-NEXT: .reg .b32 %r<7>; ; CHECK-32-EMPTY: ; CHECK-32-NEXT: // %bb.0: ; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_unaligned_param_0]; ; CHECK-32-NEXT: shl.b32 %r2, %r1, 2; ; CHECK-32-NEXT: add.s32 %r3, %r2, 7; ; CHECK-32-NEXT: and.b32 %r4, %r3, -8; ; CHECK-32-NEXT: alloca.u32 %r5, %r4, 8; ; CHECK-32-NEXT: ld.local.b32 %r6, [%r5]; ; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-32-NEXT: ret; ; ; CHECK-64-LABEL: test_dynamic_stackalloc_unaligned( ; CHECK-64: { ; CHECK-64-NEXT: .reg .b32 %r<2>; ; CHECK-64-NEXT: .reg .b64 %rd<6>; ; CHECK-64-EMPTY: ; CHECK-64-NEXT: // %bb.0: ; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_unaligned_param_0]; ; CHECK-64-NEXT: shl.b64 %rd2, %rd1, 2; ; CHECK-64-NEXT: add.s64 %rd3, %rd2, 7; ; CHECK-64-NEXT: and.b64 %rd4, %rd3, -8; ; CHECK-64-NEXT: alloca.u64 %rd5, %rd4, 8; ; CHECK-64-NEXT: ld.local.b32 %r1, [%rd5]; ; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-64-NEXT: ret; %4 = alloca float, i64 %0, align 4 %5 = getelementptr float, ptr %4, i64 0 %6 = load float, ptr %5, align 4 ret float %6 } declare i32 @bar(ptr)