1#![allow(non_camel_case_types)]
4
5#[rustfmt::skip]
6mod generated;
7#[rustfmt::skip]
8#[stable(feature = "neon_intrinsics", since = "1.59.0")]
9pub use self::generated::*;
10
11use crate::{
14 core_arch::{arm_shared::*, simd::*},
15 intrinsics::{simd::*, *},
16 mem::transmute,
17};
18#[cfg(test)]
19use stdarch_test::assert_instr;
20
21types! {
22 #![stable(feature = "neon_intrinsics", since = "1.59.0")]
23
24 pub struct float64x1_t(1 x f64); pub struct float64x2_t(2 x f64);
28}
29
30#[repr(C)]
32#[derive(Copy, Clone, Debug)]
33#[stable(feature = "neon_intrinsics", since = "1.59.0")]
34pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
35#[repr(C)]
37#[derive(Copy, Clone, Debug)]
38#[stable(feature = "neon_intrinsics", since = "1.59.0")]
39pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
40#[repr(C)]
42#[derive(Copy, Clone, Debug)]
43#[stable(feature = "neon_intrinsics", since = "1.59.0")]
44pub struct float64x1x4_t(
45 pub float64x1_t,
46 pub float64x1_t,
47 pub float64x1_t,
48 pub float64x1_t,
49);
50
51#[repr(C)]
53#[derive(Copy, Clone, Debug)]
54#[stable(feature = "neon_intrinsics", since = "1.59.0")]
55pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
56#[repr(C)]
58#[derive(Copy, Clone, Debug)]
59#[stable(feature = "neon_intrinsics", since = "1.59.0")]
60pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
61#[repr(C)]
63#[derive(Copy, Clone, Debug)]
64#[stable(feature = "neon_intrinsics", since = "1.59.0")]
65pub struct float64x2x4_t(
66 pub float64x2_t,
67 pub float64x2_t,
68 pub float64x2_t,
69 pub float64x2_t,
70);
71
72macro_rules! shift_right_and_insert {
74 ($ty:ty, $width:literal, $N:expr, $a:expr, $b:expr) => {{
75 type V = Simd<$ty, $width>;
76
77 if $N as u32 == <$ty>::BITS {
78 $a
79 } else {
80 let a: V = transmute($a);
81 let b: V = transmute($b);
82
83 let mask = <$ty>::MAX >> $N;
84 let kept: V = simd_and(a, V::splat(!mask));
85
86 let shift_counts = V::splat($N as $ty);
87 let shifted = simd_shr(b, shift_counts);
88
89 transmute(simd_or(kept, shifted))
90 }
91 }};
92}
93
94pub(crate) use shift_right_and_insert;
95
96#[inline]
98#[target_feature(enable = "neon")]
99#[cfg_attr(test, assert_instr(ldr))]
100#[stable(feature = "neon_intrinsics", since = "1.59.0")]
101pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
102 vld1_f64(ptr)
103}
104
105#[inline]
107#[target_feature(enable = "neon")]
108#[cfg_attr(test, assert_instr(ld1r))]
109#[stable(feature = "neon_intrinsics", since = "1.59.0")]
110pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
111 let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
112 simd_shuffle!(x, x, [0, 0])
113}
114
115#[inline]
117#[target_feature(enable = "neon")]
118#[rustc_legacy_const_generics(2)]
119#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
120#[stable(feature = "neon_intrinsics", since = "1.59.0")]
121pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
122 static_assert!(LANE == 0);
123 simd_insert!(src, LANE as u32, *ptr)
124}
125
126#[inline]
128#[target_feature(enable = "neon")]
129#[rustc_legacy_const_generics(2)]
130#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
131#[stable(feature = "neon_intrinsics", since = "1.59.0")]
132pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
133 static_assert_uimm_bits!(LANE, 1);
134 simd_insert!(src, LANE as u32, *ptr)
135}
136
137#[inline]
141#[target_feature(enable = "neon")]
142#[cfg_attr(test, assert_instr(bsl))]
143#[stable(feature = "neon_intrinsics", since = "1.59.0")]
144pub fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
145 let not = int64x1_t::splat(-1);
146 unsafe {
147 transmute(simd_or(
148 simd_and(a, transmute(b)),
149 simd_and(simd_xor(a, transmute(not)), transmute(c)),
150 ))
151 }
152}
153#[inline]
155#[target_feature(enable = "neon")]
156#[cfg_attr(test, assert_instr(bsl))]
157#[stable(feature = "neon_intrinsics", since = "1.59.0")]
158pub fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
159 let not = int64x1_t::splat(-1);
160 unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
161}
162#[inline]
164#[target_feature(enable = "neon")]
165#[cfg_attr(test, assert_instr(bsl))]
166#[stable(feature = "neon_intrinsics", since = "1.59.0")]
167pub fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
168 let not = int64x2_t::splat(-1);
169 unsafe {
170 transmute(simd_or(
171 simd_and(a, transmute(b)),
172 simd_and(simd_xor(a, transmute(not)), transmute(c)),
173 ))
174 }
175}
176#[inline]
178#[target_feature(enable = "neon")]
179#[cfg_attr(test, assert_instr(bsl))]
180#[stable(feature = "neon_intrinsics", since = "1.59.0")]
181pub fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
182 let not = int64x2_t::splat(-1);
183 unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
184}
185
186#[inline]
188#[target_feature(enable = "neon")]
189#[cfg_attr(test, assert_instr(fadd))]
190#[stable(feature = "neon_intrinsics", since = "1.59.0")]
191pub fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
192 unsafe { simd_add(a, b) }
193}
194
195#[inline]
197#[target_feature(enable = "neon")]
198#[cfg_attr(test, assert_instr(fadd))]
199#[stable(feature = "neon_intrinsics", since = "1.59.0")]
200pub fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
201 unsafe { simd_add(a, b) }
202}
203
204#[inline]
206#[target_feature(enable = "neon")]
207#[cfg_attr(test, assert_instr(add))]
208#[stable(feature = "neon_intrinsics", since = "1.59.0")]
209pub fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
210 unsafe { simd_add(a, b) }
211}
212
213#[inline]
215#[target_feature(enable = "neon")]
216#[cfg_attr(test, assert_instr(add))]
217#[stable(feature = "neon_intrinsics", since = "1.59.0")]
218pub fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
219 unsafe { simd_add(a, b) }
220}
221
222#[inline]
224#[target_feature(enable = "neon")]
225#[cfg_attr(test, assert_instr(add))]
226#[stable(feature = "neon_intrinsics", since = "1.59.0")]
227pub fn vaddd_s64(a: i64, b: i64) -> i64 {
228 a.wrapping_add(b)
229}
230
231#[inline]
233#[target_feature(enable = "neon")]
234#[cfg_attr(test, assert_instr(add))]
235#[stable(feature = "neon_intrinsics", since = "1.59.0")]
236pub fn vaddd_u64(a: u64, b: u64) -> u64 {
237 a.wrapping_add(b)
238}
239
240#[inline]
242#[target_feature(enable = "neon")]
243#[cfg_attr(test, assert_instr(nop, N = 0))]
244#[rustc_legacy_const_generics(2)]
245#[stable(feature = "neon_intrinsics", since = "1.59.0")]
246pub fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
247 static_assert!(N == 0);
248 a
249}
250
251#[inline]
253#[target_feature(enable = "neon")]
254#[cfg_attr(test, assert_instr(nop, N = 0))]
255#[rustc_legacy_const_generics(2)]
256#[stable(feature = "neon_intrinsics", since = "1.59.0")]
257pub fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
258 static_assert!(N == 0);
259 a
260}
261
262#[inline]
264#[target_feature(enable = "neon")]
265#[cfg_attr(test, assert_instr(fmov))]
266#[stable(feature = "neon_intrinsics", since = "1.59.0")]
267pub fn vdup_n_p64(value: p64) -> poly64x1_t {
268 unsafe { transmute(u64x1::new(value)) }
269}
270
271#[inline]
273#[target_feature(enable = "neon")]
274#[cfg_attr(test, assert_instr(nop))]
275#[stable(feature = "neon_intrinsics", since = "1.59.0")]
276pub fn vdup_n_f64(value: f64) -> float64x1_t {
277 float64x1_t::splat(value)
278}
279
280#[inline]
282#[target_feature(enable = "neon")]
283#[cfg_attr(test, assert_instr(dup))]
284#[stable(feature = "neon_intrinsics", since = "1.59.0")]
285pub fn vdupq_n_p64(value: p64) -> poly64x2_t {
286 unsafe { transmute(u64x2::new(value, value)) }
287}
288
289#[inline]
291#[target_feature(enable = "neon")]
292#[cfg_attr(test, assert_instr(dup))]
293#[stable(feature = "neon_intrinsics", since = "1.59.0")]
294pub fn vdupq_n_f64(value: f64) -> float64x2_t {
295 float64x2_t::splat(value)
296}
297
298#[inline]
300#[target_feature(enable = "neon")]
301#[cfg_attr(test, assert_instr(fmov))]
302#[stable(feature = "neon_intrinsics", since = "1.59.0")]
303pub fn vmov_n_p64(value: p64) -> poly64x1_t {
304 vdup_n_p64(value)
305}
306
307#[inline]
309#[target_feature(enable = "neon")]
310#[cfg_attr(test, assert_instr(nop))]
311#[stable(feature = "neon_intrinsics", since = "1.59.0")]
312pub fn vmov_n_f64(value: f64) -> float64x1_t {
313 vdup_n_f64(value)
314}
315
316#[inline]
318#[target_feature(enable = "neon")]
319#[cfg_attr(test, assert_instr(dup))]
320#[stable(feature = "neon_intrinsics", since = "1.59.0")]
321pub fn vmovq_n_p64(value: p64) -> poly64x2_t {
322 vdupq_n_p64(value)
323}
324
325#[inline]
327#[target_feature(enable = "neon")]
328#[cfg_attr(test, assert_instr(dup))]
329#[stable(feature = "neon_intrinsics", since = "1.59.0")]
330pub fn vmovq_n_f64(value: f64) -> float64x2_t {
331 vdupq_n_f64(value)
332}
333
334#[inline]
336#[target_feature(enable = "neon")]
337#[rustc_legacy_const_generics(1)]
338#[stable(feature = "neon_intrinsics", since = "1.59.0")]
339#[cfg_attr(
340 all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
341 assert_instr(nop, IMM5 = 0)
342)]
343pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
344 static_assert!(IMM5 == 0);
345 unsafe { simd_extract!(v, IMM5 as u32) }
346}
347
348#[inline]
350#[target_feature(enable = "neon")]
351#[cfg_attr(test, assert_instr(nop, N = 2))]
352#[rustc_legacy_const_generics(1)]
353#[stable(feature = "neon_intrinsics", since = "1.59.0")]
354pub fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
355 static_assert_uimm_bits!(N, 6);
356 a << N
357}
358
359#[inline]
361#[target_feature(enable = "neon")]
362#[cfg_attr(test, assert_instr(nop, N = 2))]
363#[rustc_legacy_const_generics(1)]
364#[stable(feature = "neon_intrinsics", since = "1.59.0")]
365pub fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
366 static_assert_uimm_bits!(N, 6);
367 a << N
368}
369
370#[inline]
372#[target_feature(enable = "neon")]
373#[cfg_attr(test, assert_instr(nop, N = 2))]
374#[rustc_legacy_const_generics(1)]
375#[stable(feature = "neon_intrinsics", since = "1.59.0")]
376pub fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
377 static_assert!(N >= 1 && N <= 64);
378 let n: i32 = if N == 64 { 63 } else { N };
379 a >> n
380}
381
382#[inline]
384#[target_feature(enable = "neon")]
385#[cfg_attr(test, assert_instr(nop, N = 2))]
386#[rustc_legacy_const_generics(1)]
387#[stable(feature = "neon_intrinsics", since = "1.59.0")]
388pub fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
389 static_assert!(N >= 1 && N <= 64);
390 let n: i32 = if N == 64 {
391 return 0;
392 } else {
393 N
394 };
395 a >> n
396}
397
398#[inline]
400#[target_feature(enable = "neon")]
401#[cfg_attr(test, assert_instr(nop, N = 2))]
402#[rustc_legacy_const_generics(2)]
403#[stable(feature = "neon_intrinsics", since = "1.59.0")]
404pub fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
405 static_assert!(N >= 1 && N <= 64);
406 a.wrapping_add(vshrd_n_s64::<N>(b))
407}
408
409#[inline]
411#[target_feature(enable = "neon")]
412#[cfg_attr(test, assert_instr(nop, N = 2))]
413#[rustc_legacy_const_generics(2)]
414#[stable(feature = "neon_intrinsics", since = "1.59.0")]
415pub fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
416 static_assert!(N >= 1 && N <= 64);
417 a.wrapping_add(vshrd_n_u64::<N>(b))
418}
419
420#[cfg(test)]
421mod tests {
422 use crate::core_arch::aarch64::test_support::*;
423 use crate::core_arch::arm_shared::test_support::*;
424 use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
425 use stdarch_test::simd_test;
426
427 #[simd_test(enable = "neon")]
428 fn test_vadd_f64() {
429 let a = f64x1::from_array([1.]);
430 let b = f64x1::from_array([8.]);
431 let e = f64x1::from_array([9.]);
432 let r = f64x1::from(vadd_f64(a.into(), b.into()));
433 assert_eq!(r, e);
434 }
435
436 #[simd_test(enable = "neon")]
437 fn test_vaddq_f64() {
438 let a = f64x2::new(1., 2.);
439 let b = f64x2::new(8., 7.);
440 let e = f64x2::new(9., 9.);
441 let r = f64x2::from(vaddq_f64(a.into(), b.into()));
442 assert_eq!(r, e);
443 }
444
445 #[simd_test(enable = "neon")]
446 fn test_vadd_s64() {
447 let a = i64x1::from_array([1]);
448 let b = i64x1::from_array([8]);
449 let e = i64x1::from_array([9]);
450 let r = i64x1::from(vadd_s64(a.into(), b.into()));
451 assert_eq!(r, e);
452 }
453
454 #[simd_test(enable = "neon")]
455 fn test_vadd_u64() {
456 let a = u64x1::from_array([1]);
457 let b = u64x1::from_array([8]);
458 let e = u64x1::from_array([9]);
459 let r = u64x1::from(vadd_u64(a.into(), b.into()));
460 assert_eq!(r, e);
461 }
462
463 #[simd_test(enable = "neon")]
464 fn test_vaddd_s64() {
465 let a = 1_i64;
466 let b = 8_i64;
467 let e = 9_i64;
468 let r: i64 = vaddd_s64(a, b);
469 assert_eq!(r, e);
470 }
471
472 #[simd_test(enable = "neon")]
473 fn test_vaddd_u64() {
474 let a = 1_u64;
475 let b = 8_u64;
476 let e = 9_u64;
477 let r: u64 = vaddd_u64(a, b);
478 assert_eq!(r, e);
479 }
480
481 #[simd_test(enable = "neon")]
482 fn test_vext_p64() {
483 let a = u64x1::new(0);
484 let b = u64x1::new(1);
485 let e = u64x1::new(0);
486 let r = u64x1::from(vext_p64::<0>(a.into(), b.into()));
487 assert_eq!(r, e);
488 }
489
490 #[simd_test(enable = "neon")]
491 fn test_vext_f64() {
492 let a = f64x1::new(0.);
493 let b = f64x1::new(1.);
494 let e = f64x1::new(0.);
495 let r = f64x1::from(vext_f64::<0>(a.into(), b.into()));
496 assert_eq!(r, e);
497 }
498
499 #[simd_test(enable = "neon")]
500 fn test_vshld_n_s64() {
501 let a: i64 = 1;
502 let e: i64 = 4;
503 let r: i64 = vshld_n_s64::<2>(a);
504 assert_eq!(r, e);
505 }
506
507 #[simd_test(enable = "neon")]
508 fn test_vshld_n_u64() {
509 let a: u64 = 1;
510 let e: u64 = 4;
511 let r: u64 = vshld_n_u64::<2>(a);
512 assert_eq!(r, e);
513 }
514
515 #[simd_test(enable = "neon")]
516 fn test_vshrd_n_s64() {
517 let a: i64 = 4;
518 let e: i64 = 1;
519 let r: i64 = vshrd_n_s64::<2>(a);
520 assert_eq!(r, e);
521 }
522
523 #[simd_test(enable = "neon")]
524 fn test_vshrd_n_u64() {
525 let a: u64 = 4;
526 let e: u64 = 1;
527 let r: u64 = vshrd_n_u64::<2>(a);
528 assert_eq!(r, e);
529 }
530
531 #[simd_test(enable = "neon")]
532 fn test_vsrad_n_s64() {
533 let a: i64 = 1;
534 let b: i64 = 4;
535 let e: i64 = 2;
536 let r: i64 = vsrad_n_s64::<2>(a, b);
537 assert_eq!(r, e);
538 }
539
540 #[simd_test(enable = "neon")]
541 fn test_vsrad_n_u64() {
542 let a: u64 = 1;
543 let b: u64 = 4;
544 let e: u64 = 2;
545 let r: u64 = vsrad_n_u64::<2>(a, b);
546 assert_eq!(r, e);
547 }
548
549 #[simd_test(enable = "neon")]
550 fn test_vdup_n_f64() {
551 let a: f64 = 3.3;
552 let e = f64x1::new(3.3);
553 let r = f64x1::from(vdup_n_f64(a));
554 assert_eq!(r, e);
555 }
556
557 #[simd_test(enable = "neon")]
558 fn test_vdup_n_p64() {
559 let a: u64 = 3;
560 let e = u64x1::new(3);
561 let r = u64x1::from(vdup_n_p64(a));
562 assert_eq!(r, e);
563 }
564
565 #[simd_test(enable = "neon")]
566 fn test_vdupq_n_f64() {
567 let a: f64 = 3.3;
568 let e = f64x2::new(3.3, 3.3);
569 let r = f64x2::from(vdupq_n_f64(a));
570 assert_eq!(r, e);
571 }
572
573 #[simd_test(enable = "neon")]
574 fn test_vdupq_n_p64() {
575 let a: u64 = 3;
576 let e = u64x2::new(3, 3);
577 let r = u64x2::from(vdupq_n_p64(a));
578 assert_eq!(r, e);
579 }
580
581 #[simd_test(enable = "neon")]
582 fn test_vmov_n_p64() {
583 let a: u64 = 3;
584 let e = u64x1::new(3);
585 let r = u64x1::from(vmov_n_p64(a));
586 assert_eq!(r, e);
587 }
588
589 #[simd_test(enable = "neon")]
590 fn test_vmov_n_f64() {
591 let a: f64 = 3.3;
592 let e = f64x1::new(3.3);
593 let r = f64x1::from(vmov_n_f64(a));
594 assert_eq!(r, e);
595 }
596
597 #[simd_test(enable = "neon")]
598 fn test_vmovq_n_p64() {
599 let a: u64 = 3;
600 let e = u64x2::new(3, 3);
601 let r = u64x2::from(vmovq_n_p64(a));
602 assert_eq!(r, e);
603 }
604
605 #[simd_test(enable = "neon")]
606 fn test_vmovq_n_f64() {
607 let a: f64 = 3.3;
608 let e = f64x2::new(3.3, 3.3);
609 let r = f64x2::from(vmovq_n_f64(a));
610 assert_eq!(r, e);
611 }
612
613 #[simd_test(enable = "neon")]
614 fn test_vget_lane_f64() {
615 let v = f64x1::new(1.0);
616 let r = vget_lane_f64::<0>(v.into());
617 assert_eq!(r, 1.0);
618 }
619
620 #[simd_test(enable = "neon")]
621 fn test_vcopy_lane_s64() {
622 let a = i64x1::new(1);
623 let b = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
624 let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
625 let r = i64x1::from(vcopy_lane_s64::<0, 0>(a.into(), b.into()));
626 assert_eq!(r, e);
627 }
628
629 #[simd_test(enable = "neon")]
630 fn test_vcopy_lane_u64() {
631 let a = u64x1::new(1);
632 let b = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
633 let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
634 let r = u64x1::from(vcopy_lane_u64::<0, 0>(a.into(), b.into()));
635 assert_eq!(r, e);
636 }
637
638 #[simd_test(enable = "neon")]
639 fn test_vcopy_lane_p64() {
640 let a = u64x1::new(1);
641 let b = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
642 let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
643 let r = u64x1::from(vcopy_lane_p64::<0, 0>(a.into(), b.into()));
644 assert_eq!(r, e);
645 }
646
647 #[simd_test(enable = "neon")]
648 fn test_vcopy_lane_f64() {
649 let a = f64x1::from_array([1.]);
650 let b = f64x1::from_array([0.]);
651 let e = f64x1::from_array([0.]);
652 let r = f64x1::from(vcopy_lane_f64::<0, 0>(a.into(), b.into()));
653 assert_eq!(r, e);
654 }
655
656 #[simd_test(enable = "neon")]
657 fn test_vbsl_f64() {
658 let a = u64x1::new(0x8000000000000000);
659 let b = f64x1::new(-1.23f64);
660 let c = f64x1::new(2.34f64);
661 let e = f64x1::new(-2.34f64);
662 let r = f64x1::from(vbsl_f64(a.into(), b.into(), c.into()));
663 assert_eq!(r, e);
664 }
665
666 #[simd_test(enable = "neon")]
667 fn test_vbsl_p64() {
668 let a = u64x1::new(1);
669 let b = u64x1::new(u64::MAX);
670 let c = u64x1::new(u64::MIN);
671 let e = u64x1::new(1);
672 let r = u64x1::from(vbsl_p64(a.into(), b.into(), c.into()));
673 assert_eq!(r, e);
674 }
675
676 #[simd_test(enable = "neon")]
677 fn test_vbslq_f64() {
678 let a = u64x2::new(1, 0x8000000000000000);
679 let b = f64x2::new(f64::MAX, -1.23f64);
680 let c = f64x2::new(f64::MIN, 2.34f64);
681 let e = f64x2::new(f64::MIN, -2.34f64);
682 let r = f64x2::from(vbslq_f64(a.into(), b.into(), c.into()));
683 assert_eq!(r, e);
684 }
685
686 #[simd_test(enable = "neon")]
687 fn test_vbslq_p64() {
688 let a = u64x2::new(u64::MAX, 1);
689 let b = u64x2::new(u64::MAX, u64::MAX);
690 let c = u64x2::new(u64::MIN, u64::MIN);
691 let e = u64x2::new(u64::MAX, 1);
692 let r = u64x2::from(vbslq_p64(a.into(), b.into(), c.into()));
693 assert_eq!(r, e);
694 }
695
696 #[simd_test(enable = "neon")]
697 fn test_vld1_f64() {
698 let a: [f64; 2] = [0., 1.];
699 let e = f64x1::new(1.);
700 let r = unsafe { f64x1::from(vld1_f64(a[1..].as_ptr())) };
701 assert_eq!(r, e)
702 }
703
704 #[simd_test(enable = "neon")]
705 fn test_vld1q_f64() {
706 let a: [f64; 3] = [0., 1., 2.];
707 let e = f64x2::new(1., 2.);
708 let r = unsafe { f64x2::from(vld1q_f64(a[1..].as_ptr())) };
709 assert_eq!(r, e)
710 }
711
712 #[simd_test(enable = "neon")]
713 fn test_vld1_dup_f64() {
714 let a: [f64; 2] = [1., 42.];
715 let e = f64x1::new(42.);
716 let r = unsafe { f64x1::from(vld1_dup_f64(a[1..].as_ptr())) };
717 assert_eq!(r, e)
718 }
719
720 #[simd_test(enable = "neon")]
721 fn test_vld1q_dup_f64() {
722 let elem: f64 = 42.;
723 let e = f64x2::new(42., 42.);
724 let r = unsafe { f64x2::from(vld1q_dup_f64(&elem)) };
725 assert_eq!(r, e)
726 }
727
728 #[simd_test(enable = "neon")]
729 fn test_vld1_lane_f64() {
730 let a = f64x1::new(0.);
731 let elem: f64 = 42.;
732 let e = f64x1::new(42.);
733 let r = unsafe { f64x1::from(vld1_lane_f64::<0>(&elem, a.into())) };
734 assert_eq!(r, e)
735 }
736
737 #[simd_test(enable = "neon")]
738 fn test_vld1q_lane_f64() {
739 let a = f64x2::new(0., 1.);
740 let elem: f64 = 42.;
741 let e = f64x2::new(0., 42.);
742 let r = unsafe { f64x2::from(vld1q_lane_f64::<1>(&elem, a.into())) };
743 assert_eq!(r, e)
744 }
745
746 #[simd_test(enable = "neon")]
747 fn test_vst1_f64() {
748 let mut vals = [0_f64; 2];
749 let a = f64x1::new(1.);
750
751 unsafe {
752 vst1_f64(vals[1..].as_mut_ptr(), a.into());
753 }
754
755 assert_eq!(vals[0], 0.);
756 assert_eq!(vals[1], 1.);
757 }
758
759 #[simd_test(enable = "neon")]
760 fn test_vst1q_f64() {
761 let mut vals = [0_f64; 3];
762 let a = f64x2::new(1., 2.);
763
764 unsafe {
765 vst1q_f64(vals[1..].as_mut_ptr(), a.into());
766 }
767
768 assert_eq!(vals[0], 0.);
769 assert_eq!(vals[1], 1.);
770 assert_eq!(vals[2], 2.);
771 }
772
773 macro_rules! wide_store_load_roundtrip {
774 ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
775 let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
776 let a: $vec_ty = transmute(vals);
777 let mut tmp = core::mem::MaybeUninit::<[$elem_ty; $len]>::uninit();
778 $store(tmp.as_mut_ptr().cast(), a);
779
780 let tmp = tmp.assume_init();
782
783 let r: $vec_ty = $load(tmp.as_ptr().cast());
784 let out: [$elem_ty; $len] = transmute(r);
785 assert_eq!(out, vals);
786 };
787 }
788
789 macro_rules! wide_store_load_roundtrip_fp16 {
790 ($( $name:ident $args:tt);* $(;)?) => {
791 $(
792 #[cfg_attr(miri, ignore)] #[simd_test(enable = "neon,fp16")]
794 #[cfg(not(target_arch = "arm64ec"))]
795 unsafe fn $name() {
796 wide_store_load_roundtrip! $args;
797 }
798 )*
799 };
800 }
801
802 wide_store_load_roundtrip_fp16! {
803 test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2);
804 test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3);
805 test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4);
806
807 test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
808 test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
809 test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
810
811 test_vld2_f16(f16, 8, float16x4x2_t, vst2_f16, vld2_f16);
812 test_vld3_f16(f16, 12, float16x4x3_t, vst3_f16, vld3_f16);
813 test_vld4_f16(f16, 16, float16x4x4_t, vst4_f16, vld4_f16);
814
815 test_vld2q_f16(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16);
816 test_vld3q_f16(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16);
817 test_vld4q_f16(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16);
818 }
819
820 macro_rules! wide_store_load_roundtrip_aes {
821 ($( $name:ident $args:tt);* $(;)?) => {
822 $(
823 #[simd_test(enable = "neon,aes")]
824 unsafe fn $name() {
825 wide_store_load_roundtrip! $args;
826 }
827 )*
828 };
829 }
830
831 wide_store_load_roundtrip_aes! {
832 test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2);
833 test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3);
834 test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4);
835
836 test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2);
837 test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3);
838 test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4);
839 }
840
841 macro_rules! wide_store_load_roundtrip_neon {
842 ($( $name:ident $args:tt);* $(;)?) => {
843 $(
844 #[simd_test(enable = "neon")]
845 unsafe fn $name() {
846 wide_store_load_roundtrip! $args;
847 }
848 )*
849 };
850 }
851
852 wide_store_load_roundtrip_neon! {
853 test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2);
854 test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3);
855 test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4);
856
857 test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2);
858 test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
859 test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
860
861 test_vld1_f64_x2(f64, 2, float64x1x2_t, vst1_f64_x2, vld1_f64_x2);
862 test_vld1_f64_x3(f64, 3, float64x1x3_t, vst1_f64_x3, vld1_f64_x3);
863 test_vld1_f64_x4(f64, 4, float64x1x4_t, vst1_f64_x4, vld1_f64_x4);
864
865 test_vld1q_f64_x2(f64, 4, float64x2x2_t, vst1q_f64_x2, vld1q_f64_x2);
866 test_vld1q_f64_x3(f64, 6, float64x2x3_t, vst1q_f64_x3, vld1q_f64_x3);
867 test_vld1q_f64_x4(f64, 8, float64x2x4_t, vst1q_f64_x4, vld1q_f64_x4);
868
869 test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
870 test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
871 test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
872
873 test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2);
874 test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3);
875 test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4);
876
877 test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2);
878 test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3);
879 test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4);
880
881 test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2);
882 test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3);
883 test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4);
884
885 test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2);
886 test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3);
887 test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4);
888
889 test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2);
890 test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3);
891 test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4);
892
893 test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2);
894 test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3);
895 test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4);
896
897 test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2);
898 test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3);
899 test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4);
900
901 test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2);
902 test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3);
903 test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4);
904
905 test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2);
906 test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3);
907 test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4);
908
909 test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2);
910 test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3);
911 test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4);
912
913 test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2);
914 test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3);
915 test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4);
916
917 test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2);
918 test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3);
919 test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4);
920
921 test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2);
922 test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3);
923 test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4);
924
925 test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2);
926 test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3);
927 test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4);
928
929 test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2);
930 test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3);
931 test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4);
932
933 test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2);
934 test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3);
935 test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4);
936
937 test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2);
938 test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3);
939 test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4);
940
941 test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2);
942 test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3);
943 test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4);
944
945 test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2);
946 test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3);
947 test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4);
948 }
949
950 wide_store_load_roundtrip_neon! {
951 test_vld2_f32(f32, 4, float32x2x2_t, vst2_f32, vld2_f32);
952 test_vld3_f32(f32, 6, float32x2x3_t, vst3_f32, vld3_f32);
953 test_vld4_f32(f32, 8, float32x2x4_t, vst4_f32, vld4_f32);
954
955 test_vld2q_f32(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32);
956 test_vld3q_f32(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32);
957 test_vld4q_f32(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32);
958
959 test_vld2_f64(f64, 2, float64x1x2_t, vst2_f64, vld2_f64);
960 test_vld3_f64(f64, 3, float64x1x3_t, vst3_f64, vld3_f64);
961 test_vld4_f64(f64, 4, float64x1x4_t, vst4_f64, vld4_f64);
962
963 test_vld2q_f64(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64);
964 test_vld3q_f64(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64);
965 test_vld4q_f64(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64);
966
967 test_vld2_s8(i8, 16, int8x8x2_t, vst2_s8, vld2_s8);
968 test_vld3_s8(i8, 24, int8x8x3_t, vst3_s8, vld3_s8);
969 test_vld4_s8(i8, 32, int8x8x4_t, vst4_s8, vld4_s8);
970
971 test_vld2q_s8(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8);
972 test_vld3q_s8(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8);
973 test_vld4q_s8(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8);
974
975 test_vld2_s16(i16, 8, int16x4x2_t, vst2_s16, vld2_s16);
976 test_vld3_s16(i16, 12, int16x4x3_t, vst3_s16, vld3_s16);
977 test_vld4_s16(i16, 16, int16x4x4_t, vst4_s16, vld4_s16);
978
979 test_vld2q_s16(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16);
980 test_vld3q_s16(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16);
981 test_vld4q_s16(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16);
982
983 test_vld2_s32(i32, 4, int32x2x2_t, vst2_s32, vld2_s32);
984 test_vld3_s32(i32, 6, int32x2x3_t, vst3_s32, vld3_s32);
985 test_vld4_s32(i32, 8, int32x2x4_t, vst4_s32, vld4_s32);
986
987 test_vld2q_s32(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32);
988 test_vld3q_s32(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32);
989 test_vld4q_s32(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32);
990
991 test_vld2_s64(i64, 2, int64x1x2_t, vst2_s64, vld2_s64);
992 test_vld3_s64(i64, 3, int64x1x3_t, vst3_s64, vld3_s64);
993 test_vld4_s64(i64, 4, int64x1x4_t, vst4_s64, vld4_s64);
994
995 test_vld2q_s64(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64);
996 test_vld3q_s64(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64);
997 test_vld4q_s64(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64);
998
999 test_vld2_u8(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8);
1000 test_vld3_u8(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8);
1001 test_vld4_u8(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8);
1002
1003 test_vld2q_u8(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8);
1004 test_vld3q_u8(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8);
1005 test_vld4q_u8(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8);
1006
1007 test_vld2_u16(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16);
1008 test_vld3_u16(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16);
1009 test_vld4_u16(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16);
1010
1011 test_vld2q_u16(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16);
1012 test_vld3q_u16(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16);
1013 test_vld4q_u16(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16);
1014
1015 test_vld2_u32(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32);
1016 test_vld3_u32(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32);
1017 test_vld4_u32(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32);
1018
1019 test_vld2q_u32(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32);
1020 test_vld3q_u32(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32);
1021 test_vld4q_u32(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32);
1022
1023 test_vld2_u64(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64);
1024 test_vld3_u64(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64);
1025 test_vld4_u64(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64);
1026
1027 test_vld2q_u64(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64);
1028 test_vld3q_u64(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64);
1029 test_vld4q_u64(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64);
1030
1031 test_vld2_p8(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8);
1032 test_vld3_p8(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8);
1033 test_vld4_p8(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8);
1034
1035 test_vld2q_p8(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8);
1036 test_vld3q_p8(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8);
1037 test_vld4q_p8(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8);
1038
1039 test_vld2_p16(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16);
1040 test_vld3_p16(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16);
1041 test_vld4_p16(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16);
1042
1043 test_vld2q_p16(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16);
1044 test_vld3q_p16(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16);
1045 test_vld4q_p16(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16);
1046 }
1047
1048 macro_rules! lane_wide_store_load_roundtrip {
1049 ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
1050 let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
1051 let a: $vec_ty = transmute(vals);
1052 let mut tmp = [0 as $elem_ty; 4];
1053 $store::<$idx>(tmp.as_mut_ptr().cast(), a);
1054 let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a);
1055 let out: [$elem_ty; $len] = transmute(r);
1056 assert_eq!(out, vals);
1057 };
1058 }
1059
1060 macro_rules! lane_wide_store_load_roundtrip_neon {
1061 ($( $name:ident $args:tt);* $(;)?) => {
1062 $(
1063 #[cfg_attr(miri, ignore)] #[simd_test(enable = "neon")]
1065 unsafe fn $name() {
1066 lane_wide_store_load_roundtrip! $args;
1067 }
1068 )*
1069 };
1070 }
1071
1072 lane_wide_store_load_roundtrip_neon! {
1073 test_vld2q_lane_s8(i8, 32, 15, int8x16x2_t, vst2q_lane_s8, vld2q_lane_s8);
1074 test_vld3q_lane_s8(i8, 48, 15, int8x16x3_t, vst3q_lane_s8, vld3q_lane_s8);
1075 test_vld4q_lane_s8(i8, 64, 15, int8x16x4_t, vst4q_lane_s8, vld4q_lane_s8);
1076
1077 test_vld2q_lane_u8(u8, 32, 15, uint8x16x2_t, vst2q_lane_u8, vld2q_lane_u8);
1078 test_vld3q_lane_u8(u8, 48, 15, uint8x16x3_t, vst3q_lane_u8, vld3q_lane_u8);
1079 test_vld4q_lane_u8(u8, 64, 15, uint8x16x4_t, vst4q_lane_u8, vld4q_lane_u8);
1080
1081 test_vld2_lane_s64(i64, 2, 0, int64x1x2_t, vst2_lane_s64, vld2_lane_s64);
1082 test_vld3_lane_s64(i64, 3, 0, int64x1x3_t, vst3_lane_s64, vld3_lane_s64);
1083 test_vld4_lane_s64(i64, 4, 0, int64x1x4_t, vst4_lane_s64, vld4_lane_s64);
1084 test_vld2q_lane_s64(i64, 4, 1, int64x2x2_t, vst2q_lane_s64, vld2q_lane_s64);
1085 test_vld3q_lane_s64(i64, 6, 1, int64x2x3_t, vst3q_lane_s64, vld3q_lane_s64);
1086 test_vld4q_lane_s64(i64, 8, 1, int64x2x4_t, vst4q_lane_s64, vld4q_lane_s64);
1087
1088 test_vld2_lane_u64(u64, 2, 0, uint64x1x2_t, vst2_lane_u64, vld2_lane_u64);
1089 test_vld3_lane_u64(u64, 3, 0, uint64x1x3_t, vst3_lane_u64, vld3_lane_u64);
1090 test_vld4_lane_u64(u64, 4, 0, uint64x1x4_t, vst4_lane_u64, vld4_lane_u64);
1091 test_vld2q_lane_u64(u64, 4, 1, uint64x2x2_t, vst2q_lane_u64, vld2q_lane_u64);
1092 test_vld3q_lane_u64(u64, 6, 1, uint64x2x3_t, vst3q_lane_u64, vld3q_lane_u64);
1093 test_vld4q_lane_u64(u64, 8, 1, uint64x2x4_t, vst4q_lane_u64, vld4q_lane_u64);
1094 }
1095}
1096
1097#[cfg(test)]
1098#[path = "../../arm_shared/neon/table_lookup_tests.rs"]
1099mod table_lookup_tests;
1100
1101#[cfg(test)]
1102#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"]
1103mod shift_and_insert_tests;
1104
1105#[cfg(test)]
1106#[path = "../../arm_shared/neon/load_tests.rs"]
1107mod load_tests;
1108
1109#[cfg(test)]
1110#[path = "../../arm_shared/neon/store_tests.rs"]
1111mod store_tests;