1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
%struct.float2 = type { float, float }
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md
; CHECK-NEXT: (
; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8]
; CHECK-NEXT: )
; CHECK-NEXT: ;
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee
; CHECK-NEXT: (
; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8]
; CHECK-NEXT: )
; CHECK-NEXT: ;
define float @caller_md(float %a, float %b) {
; CHECK-LABEL: caller_md(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [caller_md_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee_md(%struct.float2 %s2)
ret float %r
}
define float @callee_md(%struct.float2 alignstack(8) %a) {
; CHECK-LABEL: callee_md(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
ret float %2
}
define float @caller(float %a, float %b) {
; CHECK-LABEL: caller(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [caller_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee(%struct.float2 %s2)
ret float %r
}
define float @callee(%struct.float2 alignstack(8) %a ) {
; CHECK-LABEL: callee(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
ret float %2
}
define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) {
; CHECK-LABEL: aligned_return(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0+4];
; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
ret %struct.float2 %a
}
|