1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Check that various LLVM idioms get lowered to NVPTX as expected.
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
%struct.S16 = type { i16, i16 }
%struct.S32 = type { i32, i32 }
define i16 @abs_i16(i16 %a) {
; CHECK-LABEL: abs_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [abs_i16_param_0];
; CHECK-NEXT: abs.s16 %rs2, %rs1;
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%neg = sub i16 0, %a
%abs.cond = icmp sge i16 %a, 0
%abs = select i1 %abs.cond, i16 %a, i16 %neg
ret i16 %abs
}
define i32 @abs_i32(i32 %a) {
; CHECK-LABEL: abs_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [abs_i32_param_0];
; CHECK-NEXT: abs.s32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%neg = sub i32 0, %a
%abs.cond = icmp sge i32 %a, 0
%abs = select i1 %abs.cond, i32 %a, i32 %neg
ret i32 %abs
}
define i64 @abs_i64(i64 %a) {
; CHECK-LABEL: abs_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [abs_i64_param_0];
; CHECK-NEXT: abs.s64 %rd2, %rd1;
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
; CHECK-NEXT: ret;
%neg = sub i64 0, %a
%abs.cond = icmp sge i64 %a, 0
%abs = select i1 %abs.cond, i64 %a, i64 %neg
ret i64 %abs
}
define %struct.S16 @i32_to_2xi16(i32 noundef %in) {
; CHECK-LABEL: i32_to_2xi16(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_param_0];
; CHECK-NEXT: shr.u32 %r2, %r1, 16;
; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
; CHECK-NEXT: ret;
%low = trunc i32 %in to i16
%high32 = lshr i32 %in, 16
%high = trunc i32 %high32 to i16
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
; Same as above, but with rearranged order of low/high parts.
define %struct.S16 @i32_to_2xi16_lh(i32 noundef %in) {
; CHECK-LABEL: i32_to_2xi16_lh(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_lh_param_0];
; CHECK-NEXT: shr.u32 %r2, %r1, 16;
; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
; CHECK-NEXT: ret;
%high32 = lshr i32 %in, 16
%high = trunc i32 %high32 to i16
%low = trunc i32 %in to i16
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
define %struct.S16 @i32_to_2xi16_not(i32 noundef %in) {
; CHECK-LABEL: i32_to_2xi16_not(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_not_param_0];
; CHECK-NEXT: shr.u32 %r2, %r1, 15;
; CHECK-NEXT: st.param.b16 [func_retval0], %r1;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2;
; CHECK-NEXT: ret;
%low = trunc i32 %in to i16
; Shift by any value other than 16 blocks the conversiopn to mov.
%high32 = lshr i32 %in, 15
%high = trunc i32 %high32 to i16
%s1 = insertvalue %struct.S16 poison, i16 %low, 0
%s = insertvalue %struct.S16 %s1, i16 %high, 1
ret %struct.S16 %s
}
define %struct.S32 @i64_to_2xi32(i64 noundef %in) {
; CHECK-LABEL: i64_to_2xi32(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_param_0];
; CHECK-NEXT: shr.u64 %rd2, %rd1, 32;
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2;
; CHECK-NEXT: ret;
%low = trunc i64 %in to i32
%high64 = lshr i64 %in, 32
%high = trunc i64 %high64 to i32
%s1 = insertvalue %struct.S32 poison, i32 %low, 0
%s = insertvalue %struct.S32 %s1, i32 %high, 1
ret %struct.S32 %s
}
define %struct.S32 @i64_to_2xi32_not(i64 noundef %in) {
; CHECK-LABEL: i64_to_2xi32_not(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_not_param_0];
; CHECK-NEXT: shr.u64 %rd2, %rd1, 31;
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2;
; CHECK-NEXT: ret;
%low = trunc i64 %in to i32
; Shift by any value other than 32 blocks the conversiopn to mov.
%high64 = lshr i64 %in, 31
%high = trunc i64 %high64 to i32
%s1 = insertvalue %struct.S32 poison, i32 %low, 0
%s = insertvalue %struct.S32 %s1, i32 %high, 1
ret %struct.S32 %s
}
; Make sure we do not get confused when our input itself is [al]shr.
define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
; CHECK-LABEL: i32_to_2xi16_shr(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_shr_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b32 param0;
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni escape_int, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: shr.s32 %r2, %r1, 16;
; CHECK-NEXT: shr.u32 %r3, %r2, 16;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3;
; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
; CHECK-NEXT: ret;
call void @escape_int(i32 %i); // Force %i to be loaded completely.
%i1 = ashr i32 %i, 16
%l = trunc i32 %i1 to i16
%h32 = ashr i32 %i1, 16
%h = trunc i32 %h32 to i16
%s0 = insertvalue %struct.S16 poison, i16 %l, 0
%s1 = insertvalue %struct.S16 %s0, i16 %h, 1
ret %struct.S16 %s1
}
declare dso_local void @escape_int(i32 noundef)
|