1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64LE
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_32
define i32 @test_Greater_than(ptr %colauths) {
; This testcase is for the special case of zero-vector comparisons.
; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor).
; This pattern is expected to be optimized in a future patch.
; POWERPC_64LE-LABEL: test_Greater_than:
; POWERPC_64LE: # %bb.0: # %entry
; POWERPC_64LE-NEXT: lfd f0, 0(r3)
; POWERPC_64LE-NEXT: xxlxor v3, v3, v3
; POWERPC_64LE-NEXT: li r4, 0
; POWERPC_64LE-NEXT: li r3, 4
; POWERPC_64LE-NEXT: xxswapd v2, f0
; POWERPC_64LE-NEXT: vcmpequh v2, v2, v3
; POWERPC_64LE-NEXT: xxlnor v2, v2, v2
; POWERPC_64LE-NEXT: vmrglh v3, v2, v2
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v3
; POWERPC_64LE-NEXT: clrlwi r4, r4, 31
; POWERPC_64LE-NEXT: rlwimi r4, r3, 1, 30, 30
; POWERPC_64LE-NEXT: mfvsrwz r3, v3
; POWERPC_64LE-NEXT: rlwimi r4, r3, 2, 29, 29
; POWERPC_64LE-NEXT: li r3, 12
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v3
; POWERPC_64LE-NEXT: rlwimi r4, r3, 3, 28, 28
; POWERPC_64LE-NEXT: stb r4, -1(r1)
; POWERPC_64LE-NEXT: lbz r3, -1(r1)
; POWERPC_64LE-NEXT: popcntd r3, r3
; POWERPC_64LE-NEXT: blr
;
; POWERPC_64-LABEL: test_Greater_than:
; POWERPC_64: # %bb.0: # %entry
; POWERPC_64-NEXT: lxsd v2, 0(r3)
; POWERPC_64-NEXT: xxlxor v3, v3, v3
; POWERPC_64-NEXT: li r4, 12
; POWERPC_64-NEXT: li r3, 8
; POWERPC_64-NEXT: vcmpequh v2, v2, v3
; POWERPC_64-NEXT: xxlnor v2, v2, v2
; POWERPC_64-NEXT: vmrghh v2, v2, v2
; POWERPC_64-NEXT: vextuwlx r4, r4, v2
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
; POWERPC_64-NEXT: clrlwi r4, r4, 31
; POWERPC_64-NEXT: rlwimi r4, r3, 1, 30, 30
; POWERPC_64-NEXT: mfvsrwz r3, v2
; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29
; POWERPC_64-NEXT: li r3, 0
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
; POWERPC_64-NEXT: rlwimi r4, r3, 3, 28, 28
; POWERPC_64-NEXT: stb r4, -1(r1)
; POWERPC_64-NEXT: lbz r3, -1(r1)
; POWERPC_64-NEXT: popcntd r3, r3
; POWERPC_64-NEXT: blr
;
; POWERPC_32-LABEL: test_Greater_than:
; POWERPC_32: # %bb.0: # %entry
; POWERPC_32-NEXT: li r4, 4
; POWERPC_32-NEXT: lxvwsx vs1, 0, r3
; POWERPC_32-NEXT: xxlxor v3, v3, v3
; POWERPC_32-NEXT: lxvwsx vs0, r3, r4
; POWERPC_32-NEXT: xxmrghw v2, vs1, vs0
; POWERPC_32-NEXT: vcmpequh v2, v2, v3
; POWERPC_32-NEXT: xxlnor v2, v2, v2
; POWERPC_32-NEXT: vmrghh v2, v2, v2
; POWERPC_32-NEXT: stxv v2, -32(r1)
; POWERPC_32-NEXT: lwz r3, -20(r1)
; POWERPC_32-NEXT: lwz r4, -24(r1)
; POWERPC_32-NEXT: clrlwi r3, r3, 31
; POWERPC_32-NEXT: rlwimi r3, r4, 1, 30, 30
; POWERPC_32-NEXT: lwz r4, -28(r1)
; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29
; POWERPC_32-NEXT: lwz r4, -32(r1)
; POWERPC_32-NEXT: rlwimi r3, r4, 3, 28, 28
; POWERPC_32-NEXT: popcntw r3, r3
; POWERPC_32-NEXT: blr
entry:
%0 = load <4 x i16>, ptr %colauths, align 2, !tbaa !5
%1 = icmp ne <4 x i16> %0, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
%4 = zext nneg i4 %3 to i32
ret i32 %4
}
declare i4 @llvm.ctpop.i4(i4) #1
!5 = !{!6, !6, i64 0}
!6 = !{!"short", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
; Function to lockdown changes for floating point vector comparisons
define range(i32 0, 5) i32 @cols_needed(ptr %colauths){
; POWERPC_64LE-LABEL: cols_needed:
; POWERPC_64LE: # %bb.0: # %entry
; POWERPC_64LE-NEXT: lxv vs0, 0(r3)
; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1
; POWERPC_64LE-NEXT: li r4, 4
; POWERPC_64LE-NEXT: li r3, 0
; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1
; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2
; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30
; POWERPC_64LE-NEXT: sub r3, r4, r3
; POWERPC_64LE-NEXT: mfvsrwz r4, v2
; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29
; POWERPC_64LE-NEXT: or r3, r3, r4
; POWERPC_64LE-NEXT: li r4, 12
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
; POWERPC_64LE-NEXT: slwi r4, r4, 3
; POWERPC_64LE-NEXT: or r3, r3, r4
; POWERPC_64LE-NEXT: clrlwi r3, r3, 28
; POWERPC_64LE-NEXT: stb r3, -1(r1)
; POWERPC_64LE-NEXT: lbz r3, -1(r1)
; POWERPC_64LE-NEXT: popcntd r3, r3
; POWERPC_64LE-NEXT: blr
;
; POWERPC_64-LABEL: cols_needed:
; POWERPC_64: # %bb.0: # %entry
; POWERPC_64-NEXT: lxv vs0, 0(r3)
; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1
; POWERPC_64-NEXT: li r4, 8
; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1
; POWERPC_64-NEXT: xxlnor v2, vs0, vs0
; POWERPC_64-NEXT: vextuwlx r4, r4, v2
; POWERPC_64-NEXT: mfvsrwz r3, v2
; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30
; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29
; POWERPC_64-NEXT: li r3, 0
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28
; POWERPC_64-NEXT: li r3, 12
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
; POWERPC_64-NEXT: sub r3, r4, r3
; POWERPC_64-NEXT: clrlwi r3, r3, 28
; POWERPC_64-NEXT: stb r3, -1(r1)
; POWERPC_64-NEXT: lbz r3, -1(r1)
; POWERPC_64-NEXT: popcntd r3, r3
; POWERPC_64-NEXT: blr
;
; POWERPC_32-LABEL: cols_needed:
; POWERPC_32: # %bb.0: # %entry
; POWERPC_32-NEXT: lxv vs0, 0(r3)
; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1
; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1
; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0
; POWERPC_32-NEXT: stxv vs0, -32(r1)
; POWERPC_32-NEXT: lwz r3, -24(r1)
; POWERPC_32-NEXT: lwz r4, -28(r1)
; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30
; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29
; POWERPC_32-NEXT: lwz r4, -32(r1)
; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28
; POWERPC_32-NEXT: lwz r4, -20(r1)
; POWERPC_32-NEXT: sub r3, r3, r4
; POWERPC_32-NEXT: clrlwi r3, r3, 28
; POWERPC_32-NEXT: popcntw r3, r3
; POWERPC_32-NEXT: blr
entry:
%0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5
%1 = fcmp une <4 x float> %0, zeroinitializer
%2 = bitcast <4 x i1> %1 to i4
%3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
%4 = zext nneg i4 %3 to i32
ret i32 %4
}
|