1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-32-LABEL: test_dynamic_stackalloc(
; CHECK-32: {
; CHECK-32-NEXT: .reg .b32 %r<8>;
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_param_0];
; CHECK-32-NEXT: add.s32 %r2, %r1, 7;
; CHECK-32-NEXT: and.b32 %r3, %r2, -8;
; CHECK-32-NEXT: alloca.u32 %r4, %r3, 16;
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
; CHECK-32-NEXT: { // callseq 0, 0
; CHECK-32-NEXT: .param .b32 param0;
; CHECK-32-NEXT: .param .b32 retval0;
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-32-NEXT: } // callseq 0
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-32-NEXT: ret;
;
; CHECK-64-LABEL: test_dynamic_stackalloc(
; CHECK-64: {
; CHECK-64-NEXT: .reg .b32 %r<3>;
; CHECK-64-NEXT: .reg .b64 %rd<6>;
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_param_0];
; CHECK-64-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-64-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-64-NEXT: alloca.u64 %rd4, %rd3, 16;
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
; CHECK-64-NEXT: { // callseq 0, 0
; CHECK-64-NEXT: .param .b64 param0;
; CHECK-64-NEXT: .param .b32 retval0;
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-64-NEXT: } // callseq 0
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-64-NEXT: ret;
%alloca = alloca i8, i64 %n, align 16
%call = call i32 @bar(ptr %alloca)
ret i32 %call
}
define float @test_dynamic_stackalloc_unaligned(i64 %0) {
; CHECK-32-LABEL: test_dynamic_stackalloc_unaligned(
; CHECK-32: {
; CHECK-32-NEXT: .reg .b32 %r<7>;
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_unaligned_param_0];
; CHECK-32-NEXT: shl.b32 %r2, %r1, 2;
; CHECK-32-NEXT: add.s32 %r3, %r2, 7;
; CHECK-32-NEXT: and.b32 %r4, %r3, -8;
; CHECK-32-NEXT: alloca.u32 %r5, %r4, 8;
; CHECK-32-NEXT: ld.local.b32 %r6, [%r5];
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-32-NEXT: ret;
;
; CHECK-64-LABEL: test_dynamic_stackalloc_unaligned(
; CHECK-64: {
; CHECK-64-NEXT: .reg .b32 %r<2>;
; CHECK-64-NEXT: .reg .b64 %rd<6>;
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_unaligned_param_0];
; CHECK-64-NEXT: shl.b64 %rd2, %rd1, 2;
; CHECK-64-NEXT: add.s64 %rd3, %rd2, 7;
; CHECK-64-NEXT: and.b64 %rd4, %rd3, -8;
; CHECK-64-NEXT: alloca.u64 %rd5, %rd4, 8;
; CHECK-64-NEXT: ld.local.b32 %r1, [%rd5];
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-64-NEXT: ret;
%4 = alloca float, i64 %0, align 4
%5 = getelementptr float, ptr %4, i64 0
%6 = load float, ptr %5, align 4
ret float %6
}
declare i32 @bar(ptr)
|