aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
blob: ec0693a541e4400d31cccc6b0adacafed41abafc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE

target triple = "aarch64-unknown-linux-gnu"

define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) {
; CHECK-LABEL: select_v2f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    mov z3.s, z2.s[1]
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h
; CHECK-NEXT:    lsl z2.h, z2.h, #15
; CHECK-NEXT:    asr z2.h, z2.h, #15
; CHECK-NEXT:    and z2.h, z2.h, #0x1
; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v2f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #32
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
; NONEON-NOSVE-NEXT:    str d0, [sp]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12]
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    str w10, [sp, #28]
; NONEON-NOSVE-NEXT:    tst w9, #0xffff
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp]
; NONEON-NOSVE-NEXT:    tst w8, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT:    add sp, sp, #32
; NONEON-NOSVE-NEXT:    ret
  %sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2
  ret <2 x half> %sel
}

define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) {
; CHECK-LABEL: select_v4f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT:    lsl z2.h, z2.h, #15
; CHECK-NEXT:    asr z2.h, z2.h, #15
; CHECK-NEXT:    and z2.h, z2.h, #0x1
; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v4f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #32
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #22]
; NONEON-NOSVE-NEXT:    str d0, [sp]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #20]
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #18]
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
; NONEON-NOSVE-NEXT:    tst w9, #0xffff
; NONEON-NOSVE-NEXT:    sbfx w9, w11, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT:    tst w9, #0xffff
; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT:    tst w9, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp]
; NONEON-NOSVE-NEXT:    tst w8, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT:    add sp, sp, #32
; NONEON-NOSVE-NEXT:    ret
  %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
  ret <4 x half> %sel
}

define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) {
; CHECK-LABEL: select_v8f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT:    uunpklo z2.h, z2.b
; CHECK-NEXT:    lsl z2.h, z2.h, #15
; CHECK-NEXT:    asr z2.h, z2.h, #15
; CHECK-NEXT:    and z2.h, z2.h, #0x1
; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v8f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #64
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT:    str d2, [sp, #40]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #47]
; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #46]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #45]
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #44]
; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #43]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #42]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #40]
; NONEON-NOSVE-NEXT:    tst w13, #0xffff
; NONEON-NOSVE-NEXT:    sbfx w13, w15, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
; NONEON-NOSVE-NEXT:    tst w13, #0xffff
; NONEON-NOSVE-NEXT:    sbfx w13, w14, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT:    tst w13, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT:    tst w12, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT:    tst w11, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT:    tst w10, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT:    tst w9, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldr h1, [sp]
; NONEON-NOSVE-NEXT:    tst w8, #0xffff
; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT:    add sp, sp, #64
; NONEON-NOSVE-NEXT:    ret
  %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
  ret <8 x half> %sel
}

define void @select_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: select_v16f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q0, q3, [x1]
; CHECK-NEXT:    ptrue p0.h, vl8
; CHECK-NEXT:    ldp q1, q2, [x0]
; CHECK-NEXT:    fcmeq p1.h, p0/z, z1.h, z0.h
; CHECK-NEXT:    fcmeq p0.h, p0/z, z2.h, z3.h
; CHECK-NEXT:    mov z0.h, p1/m, z1.h
; CHECK-NEXT:    sel z1.h, p0, z2.h, z3.h
; CHECK-NEXT:    stp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v16f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    ldp q0, q3, [x1]
; NONEON-NOSVE-NEXT:    ldp q1, q2, [x0]
; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT:    stp q2, q3, [sp, #32]
; NONEON-NOSVE-NEXT:    ldr h4, [sp, #20]
; NONEON-NOSVE-NEXT:    ldr h5, [sp, #4]
; NONEON-NOSVE-NEXT:    ldr h16, [sp, #22]
; NONEON-NOSVE-NEXT:    fcvt s2, h0
; NONEON-NOSVE-NEXT:    fcvt s3, h1
; NONEON-NOSVE-NEXT:    ldr h17, [sp, #6]
; NONEON-NOSVE-NEXT:    fcvt s6, h4
; NONEON-NOSVE-NEXT:    fcvt s7, h5
; NONEON-NOSVE-NEXT:    ldr h19, [sp, #8]
; NONEON-NOSVE-NEXT:    fcvt s18, h17
; NONEON-NOSVE-NEXT:    ldr h21, [sp, #10]
; NONEON-NOSVE-NEXT:    ldr h22, [sp, #12]
; NONEON-NOSVE-NEXT:    fcvt s20, h19
; NONEON-NOSVE-NEXT:    ldr h24, [sp, #32]
; NONEON-NOSVE-NEXT:    ldr h25, [sp, #34]
; NONEON-NOSVE-NEXT:    fcmp s3, s2
; NONEON-NOSVE-NEXT:    fcvt s2, h16
; NONEON-NOSVE-NEXT:    ldr h3, [sp, #24]
; NONEON-NOSVE-NEXT:    ldr h26, [sp, #36]
; NONEON-NOSVE-NEXT:    ldr h27, [sp, #38]
; NONEON-NOSVE-NEXT:    ldr h28, [sp, #42]
; NONEON-NOSVE-NEXT:    ldr h29, [sp, #44]
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, eq
; NONEON-NOSVE-NEXT:    fcmp s7, s6
; NONEON-NOSVE-NEXT:    fcvt s7, h3
; NONEON-NOSVE-NEXT:    ldr h6, [sp, #26]
; NONEON-NOSVE-NEXT:    fcsel s1, s5, s4, eq
; NONEON-NOSVE-NEXT:    fcmp s18, s2
; NONEON-NOSVE-NEXT:    fcvt s4, h6
; NONEON-NOSVE-NEXT:    fcvt s18, h21
; NONEON-NOSVE-NEXT:    ldr h5, [sp, #28]
; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
; NONEON-NOSVE-NEXT:    fcsel s2, s17, s16, eq
; NONEON-NOSVE-NEXT:    fcmp s20, s7
; NONEON-NOSVE-NEXT:    fcvt s16, h5
; NONEON-NOSVE-NEXT:    fcvt s17, h22
; NONEON-NOSVE-NEXT:    ldr h7, [sp, #30]
; NONEON-NOSVE-NEXT:    ldr h20, [sp, #14]
; NONEON-NOSVE-NEXT:    str h1, [sp, #68]
; NONEON-NOSVE-NEXT:    fcsel s3, s19, s3, eq
; NONEON-NOSVE-NEXT:    fcmp s18, s4
; NONEON-NOSVE-NEXT:    fcvt s19, h7
; NONEON-NOSVE-NEXT:    fcvt s23, h20
; NONEON-NOSVE-NEXT:    ldr h18, [sp, #48]
; NONEON-NOSVE-NEXT:    str h2, [sp, #70]
; NONEON-NOSVE-NEXT:    fcsel s4, s21, s6, eq
; NONEON-NOSVE-NEXT:    fcmp s17, s16
; NONEON-NOSVE-NEXT:    fcvt s17, h18
; NONEON-NOSVE-NEXT:    fcvt s21, h24
; NONEON-NOSVE-NEXT:    ldr h16, [sp, #50]
; NONEON-NOSVE-NEXT:    str h3, [sp, #72]
; NONEON-NOSVE-NEXT:    fcsel s5, s22, s5, eq
; NONEON-NOSVE-NEXT:    fcmp s23, s19
; NONEON-NOSVE-NEXT:    fcvt s22, h16
; NONEON-NOSVE-NEXT:    fcvt s23, h25
; NONEON-NOSVE-NEXT:    ldr h19, [sp, #52]
; NONEON-NOSVE-NEXT:    str h4, [sp, #74]
; NONEON-NOSVE-NEXT:    fcsel s6, s20, s7, eq
; NONEON-NOSVE-NEXT:    fcmp s21, s17
; NONEON-NOSVE-NEXT:    fcvt s20, h19
; NONEON-NOSVE-NEXT:    fcvt s21, h26
; NONEON-NOSVE-NEXT:    ldr h17, [sp, #54]
; NONEON-NOSVE-NEXT:    str h5, [sp, #76]
; NONEON-NOSVE-NEXT:    fcsel s7, s24, s18, eq
; NONEON-NOSVE-NEXT:    fcmp s23, s22
; NONEON-NOSVE-NEXT:    fcvt s22, h17
; NONEON-NOSVE-NEXT:    fcvt s23, h27
; NONEON-NOSVE-NEXT:    ldr h18, [sp, #56]
; NONEON-NOSVE-NEXT:    ldr h24, [sp, #40]
; NONEON-NOSVE-NEXT:    str h6, [sp, #78]
; NONEON-NOSVE-NEXT:    fcsel s16, s25, s16, eq
; NONEON-NOSVE-NEXT:    fcmp s21, s20
; NONEON-NOSVE-NEXT:    fcvt s21, h18
; NONEON-NOSVE-NEXT:    fcvt s25, h24
; NONEON-NOSVE-NEXT:    ldr h20, [sp, #58]
; NONEON-NOSVE-NEXT:    str h7, [sp, #80]
; NONEON-NOSVE-NEXT:    fcsel s19, s26, s19, eq
; NONEON-NOSVE-NEXT:    fcmp s23, s22
; NONEON-NOSVE-NEXT:    fcvt s23, h20
; NONEON-NOSVE-NEXT:    fcvt s26, h28
; NONEON-NOSVE-NEXT:    ldr h22, [sp, #60]
; NONEON-NOSVE-NEXT:    str h16, [sp, #82]
; NONEON-NOSVE-NEXT:    fcsel s17, s27, s17, eq
; NONEON-NOSVE-NEXT:    fcmp s25, s21
; NONEON-NOSVE-NEXT:    fcvt s25, h22
; NONEON-NOSVE-NEXT:    fcvt s27, h29
; NONEON-NOSVE-NEXT:    ldr h21, [sp, #62]
; NONEON-NOSVE-NEXT:    str h19, [sp, #84]
; NONEON-NOSVE-NEXT:    fcsel s18, s24, s18, eq
; NONEON-NOSVE-NEXT:    ldr h24, [sp, #46]
; NONEON-NOSVE-NEXT:    fcmp s26, s23
; NONEON-NOSVE-NEXT:    fcvt s23, h21
; NONEON-NOSVE-NEXT:    str h17, [sp, #86]
; NONEON-NOSVE-NEXT:    fcvt s26, h24
; NONEON-NOSVE-NEXT:    fcsel s20, s28, s20, eq
; NONEON-NOSVE-NEXT:    fcmp s27, s25
; NONEON-NOSVE-NEXT:    ldr h25, [sp, #16]
; NONEON-NOSVE-NEXT:    ldr h27, [sp]
; NONEON-NOSVE-NEXT:    str h18, [sp, #88]
; NONEON-NOSVE-NEXT:    fcvt s17, h25
; NONEON-NOSVE-NEXT:    fcvt s18, h27
; NONEON-NOSVE-NEXT:    fcsel s7, s29, s22, eq
; NONEON-NOSVE-NEXT:    fcmp s26, s23
; NONEON-NOSVE-NEXT:    str h20, [sp, #90]
; NONEON-NOSVE-NEXT:    fcsel s16, s24, s21, eq
; NONEON-NOSVE-NEXT:    str h7, [sp, #92]
; NONEON-NOSVE-NEXT:    fcmp s18, s17
; NONEON-NOSVE-NEXT:    str h16, [sp, #94]
; NONEON-NOSVE-NEXT:    fcsel s2, s27, s25, eq
; NONEON-NOSVE-NEXT:    str h2, [sp, #64]
; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
; NONEON-NOSVE-NEXT:    add sp, sp, #96
; NONEON-NOSVE-NEXT:    ret
  %op1 = load <16 x half>, ptr %a
  %op2 = load <16 x half>, ptr %b
  %mask = fcmp oeq <16 x half> %op1, %op2
  %sel = select <16 x i1> %mask, <16 x half> %op1, <16 x half> %op2
  store <16 x half> %sel, ptr %a
  ret void
}

define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) {
; CHECK-LABEL: select_v2f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT:    lsl z2.s, z2.s, #31
; CHECK-NEXT:    asr z2.s, z2.s, #31
; CHECK-NEXT:    and z2.s, z2.s, #0x1
; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v2f32:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    stp d2, d0, [sp, #-32]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp]
; NONEON-NOSVE-NEXT:    str d1, [sp, #16]
; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    cmp w8, #0
; NONEON-NOSVE-NEXT:    sbfx w8, w9, #0, #1
; NONEON-NOSVE-NEXT:    fcsel s3, s2, s0, ne
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT:    cmp w8, #0
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT:    add sp, sp, #32
; NONEON-NOSVE-NEXT:    ret
  %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
  ret <2 x float> %sel
}

define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) {
; CHECK-LABEL: select_v4f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT:    uunpklo z2.s, z2.h
; CHECK-NEXT:    lsl z2.s, z2.s, #31
; CHECK-NEXT:    asr z2.s, z2.s, #31
; CHECK-NEXT:    and z2.s, z2.s, #0x1
; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v4f32:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #64
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT:    str d2, [sp, #8]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #12]
; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #24]
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #10]
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
; NONEON-NOSVE-NEXT:    cmp w9, #0
; NONEON-NOSVE-NEXT:    sbfx w9, w11, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    fcsel s3, s2, s0, ne
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
; NONEON-NOSVE-NEXT:    cmp w9, #0
; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #16]
; NONEON-NOSVE-NEXT:    cmp w9, #0
; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #56]
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
; NONEON-NOSVE-NEXT:    fcsel s3, s2, s0, ne
; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
; NONEON-NOSVE-NEXT:    cmp w8, #0
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #48]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT:    add sp, sp, #64
; NONEON-NOSVE-NEXT:    ret
  %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
  ret <4 x float> %sel
}

define void @select_v8f32(ptr %a, ptr %b) {
; CHECK-LABEL: select_v8f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q0, q3, [x1]
; CHECK-NEXT:    ptrue p0.s, vl4
; CHECK-NEXT:    ldp q1, q2, [x0]
; CHECK-NEXT:    fcmeq p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT:    fcmeq p0.s, p0/z, z2.s, z3.s
; CHECK-NEXT:    mov z0.s, p1/m, z1.s
; CHECK-NEXT:    sel z1.s, p0, z2.s, z3.s
; CHECK-NEXT:    stp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v8f32:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    ldp q0, q3, [x1]
; NONEON-NOSVE-NEXT:    ldp q1, q2, [x0]
; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT:    stp q2, q3, [sp, #32]
; NONEON-NOSVE-NEXT:    ldp s0, s2, [sp, #20]
; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #4]
; NONEON-NOSVE-NEXT:    ldr s4, [sp, #12]
; NONEON-NOSVE-NEXT:    ldr s17, [sp]
; NONEON-NOSVE-NEXT:    ldp s6, s7, [sp, #36]
; NONEON-NOSVE-NEXT:    fcmp s1, s0
; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, eq
; NONEON-NOSVE-NEXT:    fcmp s3, s2
; NONEON-NOSVE-NEXT:    ldp s1, s5, [sp, #28]
; NONEON-NOSVE-NEXT:    fcsel s2, s3, s2, eq
; NONEON-NOSVE-NEXT:    ldp s16, s3, [sp, #44]
; NONEON-NOSVE-NEXT:    fcmp s4, s1
; NONEON-NOSVE-NEXT:    fcsel s1, s4, s1, eq
; NONEON-NOSVE-NEXT:    fcmp s5, s3
; NONEON-NOSVE-NEXT:    ldr s4, [sp, #52]
; NONEON-NOSVE-NEXT:    fcsel s3, s5, s3, eq
; NONEON-NOSVE-NEXT:    fcmp s6, s4
; NONEON-NOSVE-NEXT:    ldr s5, [sp, #56]
; NONEON-NOSVE-NEXT:    stp s2, s1, [sp, #72]
; NONEON-NOSVE-NEXT:    fcsel s4, s6, s4, eq
; NONEON-NOSVE-NEXT:    fcmp s7, s5
; NONEON-NOSVE-NEXT:    ldr s6, [sp, #60]
; NONEON-NOSVE-NEXT:    fcsel s5, s7, s5, eq
; NONEON-NOSVE-NEXT:    fcmp s16, s6
; NONEON-NOSVE-NEXT:    ldr s7, [sp, #16]
; NONEON-NOSVE-NEXT:    stp s3, s4, [sp, #80]
; NONEON-NOSVE-NEXT:    fcsel s6, s16, s6, eq
; NONEON-NOSVE-NEXT:    fcmp s17, s7
; NONEON-NOSVE-NEXT:    fcsel s3, s17, s7, eq
; NONEON-NOSVE-NEXT:    stp s5, s6, [sp, #88]
; NONEON-NOSVE-NEXT:    stp s3, s0, [sp, #64]
; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
; NONEON-NOSVE-NEXT:    add sp, sp, #96
; NONEON-NOSVE-NEXT:    ret
  %op1 = load <8 x float>, ptr %a
  %op2 = load <8 x float>, ptr %b
  %mask = fcmp oeq <8 x float> %op1, %op2
  %sel = select <8 x i1> %mask, <8 x float> %op1, <8 x float> %op2
  store <8 x float> %sel, ptr %a
  ret void
}

define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> %mask) {
; CHECK-LABEL: select_v1f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    tst w0, #0x1
; CHECK-NEXT:    fcsel d0, d0, d1, ne
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v1f64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #16
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    tst w0, #0x1
; NONEON-NOSVE-NEXT:    fcsel d0, d0, d1, ne
; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
  ret <1 x double> %sel
}

define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) {
; CHECK-LABEL: select_v2f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT:    uunpklo z2.d, z2.s
; CHECK-NEXT:    lsl z2.d, z2.d, #63
; CHECK-NEXT:    asr z2.d, z2.d, #63
; CHECK-NEXT:    and z2.d, z2.d, #0x1
; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v2f64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #64
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT:    str d2, [sp, #8]
; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #16]
; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #16]
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
; NONEON-NOSVE-NEXT:    cmp x8, #0
; NONEON-NOSVE-NEXT:    sbfx x8, x9, #0, #1
; NONEON-NOSVE-NEXT:    fcsel d3, d2, d0, ne
; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
; NONEON-NOSVE-NEXT:    cmp x8, #0
; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #48]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT:    add sp, sp, #64
; NONEON-NOSVE-NEXT:    ret
  %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
  ret <2 x double> %sel
}

define void @select_v4f64(ptr %a, ptr %b) {
; CHECK-LABEL: select_v4f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q0, q3, [x1]
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    ldp q1, q2, [x0]
; CHECK-NEXT:    fcmeq p1.d, p0/z, z1.d, z0.d
; CHECK-NEXT:    fcmeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT:    mov z0.d, p1/m, z1.d
; CHECK-NEXT:    sel z1.d, p0, z2.d, z3.d
; CHECK-NEXT:    stp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: select_v4f64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    ldp q0, q3, [x1]
; NONEON-NOSVE-NEXT:    ldp q1, q2, [x0]
; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-96]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT:    stp q2, q3, [sp, #32]
; NONEON-NOSVE-NEXT:    ldp d5, d1, [sp]
; NONEON-NOSVE-NEXT:    ldp d0, d3, [sp, #24]
; NONEON-NOSVE-NEXT:    ldp d4, d2, [sp, #40]
; NONEON-NOSVE-NEXT:    fcmp d1, d0
; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, eq
; NONEON-NOSVE-NEXT:    fcmp d3, d2
; NONEON-NOSVE-NEXT:    ldr d1, [sp, #56]
; NONEON-NOSVE-NEXT:    fcsel d2, d3, d2, eq
; NONEON-NOSVE-NEXT:    fcmp d4, d1
; NONEON-NOSVE-NEXT:    ldr d3, [sp, #16]
; NONEON-NOSVE-NEXT:    fcsel d1, d4, d1, eq
; NONEON-NOSVE-NEXT:    fcmp d5, d3
; NONEON-NOSVE-NEXT:    fcsel d3, d5, d3, eq
; NONEON-NOSVE-NEXT:    stp d2, d1, [sp, #80]
; NONEON-NOSVE-NEXT:    stp d3, d0, [sp, #64]
; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
; NONEON-NOSVE-NEXT:    add sp, sp, #96
; NONEON-NOSVE-NEXT:    ret
  %op1 = load <4 x double>, ptr %a
  %op2 = load <4 x double>, ptr %b
  %mask = fcmp oeq <4 x double> %op1, %op2
  %sel = select <4 x i1> %mask, <4 x double> %op1, <4 x double> %op2
  store <4 x double> %sel, ptr %a
  ret void
}