aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorBilyan Borisov <bilyan.borisov@arm.com>2015-11-22 15:15:20 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2015-11-22 15:15:20 +0000
commit9030a4d3aa90c1db1a657fe5588c823f0ea73a86 (patch)
tree81889465952aa9181526beb88c2294298bc9b26d /gcc
parent60bcff01121db11277c1ae8419ddda8f014b6196 (diff)
downloadgcc-9030a4d3aa90c1db1a657fe5588c823f0ea73a86.zip
gcc-9030a4d3aa90c1db1a657fe5588c823f0ea73a86.tar.gz
gcc-9030a4d3aa90c1db1a657fe5588c823f0ea73a86.tar.bz2
[AARCH64][PATCH 2/3] Implementing vmulx_lane NEON intrinsic variants
gcc/ * config/aarch64/arm_neon.h (vmulx_lane_f32): New. (vmulx_lane_f64): Likewise. (vmulxq_lane_f32): Refactored & moved. (vmulxq_lane_f64): Likewise. (vmulx_laneq_f32): New. (vmulx_laneq_f64): Likewise. (vmulxq_laneq_f32): Likewise. (vmulxq_laneq_f64): Likewise. (vmulxs_lane_f32): Likewise. (vmulxs_laneq_f32): Likewise. (vmulxd_lane_f64): Likewise. (vmulxd_laneq_f64): Likewise. * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_<vswap_width_name><mode>, VDQSF): New pattern. (*aarch64_mulx_elt<mode>, VDQF): Likewise. (*aarch64_mulx_elt_to_64v2df): Likewise. (*aarch64_vgetfmulx<mode>, VDQF_DF): Likewise. gcc/testsuite/ * gcc.target/aarch64/simd/vmulx_lane_f32_1.c: New. * gcc.target/aarch64/simd/vmulx_lane_f64_1.c: New. * gcc.target/aarch64/simd/vmulx_laneq_f32_1.c: New. * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: New. * gcc.target/aarch64/simd/vmulxq_lane_f32_1.c: New. * gcc.target/aarch64/simd/vmulxq_lane_f64_1.c: New. * gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c: New. * gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c: New. * gcc.target/aarch64/simd/vmulxs_lane_f32_1.c: New. * gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c: New. * gcc.target/aarch64/simd/vmulxd_lane_f64_1.c: New. * gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c: New. From-SVN: r230720
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/aarch64/aarch64-simd.md74
-rw-r--r--gcc/config/aarch64/arm_neon.h98
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c70
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c62
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c111
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c76
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c54
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c62
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c79
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c118
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c78
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c85
16 files changed, 1098 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 00db0ba..51e0707 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2015-11-22 Bilyan Borisov <bilyan.borisov@arm.com>
+
+ * config/aarch64/aarch64-simd.md
+ (*aarch64_mulx_elt_<vswap_width_name><mode>, VDQSF): New.
+ (*aarch64_mulx_elt<mode>, VDQF): Likewise.
+ (*aarch64_mulx_elt_to_64v2df): Likewise.
+ (*aarch64_vgetfmulx<mode>, VDQF_DF): Likewise.
+ * config/aarch64/arm_neon.h (vmulx_lane_f32): New.
+ (vmulx_lane_f64): Likewise.
+ (vmulxq_lane_f32): Refactored & moved.
+ (vmulxq_lane_f64): Likewise.
+ (vmulx_laneq_f32): New.
+ (vmulx_laneq_f64): Likewise.
+ (vmulxq_laneq_f32): Likewise.
+ (vmulxq_laneq_f64): Likewise.
+ (vmulxs_lane_f32): Likewise.
+ (vmulxs_laneq_f32): Likewise.
+ (vmulxd_lane_f64): Likewise.
+ (vmulxd_laneq_f64): Likewise.
+
2015-11-21 Jan Hubicka <hubicka@ucw.cz>
* tree.c (build_pointer_type_for_mode,build_reference_type_for_mode):
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 0fb8134..3fa23b3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2907,6 +2907,80 @@
[(set_attr "type" "neon_fp_mul_<Vetype>")]
)
+;; vmulxq_lane_f32, and vmulx_laneq_f32
+
+(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand" "w")
+ (vec_duplicate:VDQSF
+ (vec_select:<VEL>
+ (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
+ INTVAL (operands[3])));
+ return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
+)
+
+;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
+
+(define_insn "*aarch64_mulx_elt<mode>"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (unspec:VDQF
+ [(match_operand:VDQF 1 "register_operand" "w")
+ (vec_duplicate:VDQF
+ (vec_select:<VEL>
+ (match_operand:VDQF 2 "register_operand" "w")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
+)
+
+;; vmulxq_lane_f64
+
+(define_insn "*aarch64_mulx_elt_to_64v2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=w")
+ (unspec:V2DF
+ [(match_operand:V2DF 1 "register_operand" "w")
+ (vec_duplicate:V2DF
+ (match_operand:DF 2 "register_operand" "w"))]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ {
+ return "fmulx\t%0.2d, %1.2d, %2.d[0]";
+ }
+ [(set_attr "type" "neon_fp_mul_d_scalar_q")]
+)
+
+;; vmulxs_lane_f32, vmulxs_laneq_f32
+;; vmulxd_lane_f64 == vmulx_lane_f64
+;; vmulxd_laneq_f64 == vmulx_laneq_f64
+
+(define_insn "*aarch64_vgetfmulx<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL>
+ [(match_operand:<VEL> 1 "register_operand" "w")
+ (vec_select:<VEL>
+ (match_operand:VDQF_DF 2 "register_operand" "w")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+ UNSPEC_FMULX))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+ return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "fmul<Vetype>")]
+)
;; <su>q<addsub>
(define_insn "aarch64_<su_optab><optab><mode>"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7f69043d..138b108 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8509,32 +8509,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b)
return result;
}
-#define vmulxq_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32x4_t a_ = (a); \
- float32x4_t result; \
- __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vmulxq_lane_f64(a, b, c) \
- __extension__ \
- ({ \
- float64x2_t b_ = (b); \
- float64x2_t a_ = (a); \
- float64x2_t result; \
- __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8 (poly8x8_t a)
{
@@ -17270,6 +17244,78 @@ vmulxd_f64 (float64_t __a, float64_t __b)
return __builtin_aarch64_fmulxdf (__a, __b);
}
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
+{
+ return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
+{
+ return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
+{
+ return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
+{
+ return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
+{
+ return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
+{
+ return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
+{
+ return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
+{
+ return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
+{
+ return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
+{
+ return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
+{
+ return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
+{
+ return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
+}
+
/* vpmax */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 10ef5c2..3c1a953 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2015-11-22 Bilyan Borisov <bilyan.borisov@arm.com>
+
+ * gcc.target/aarch64/simd/vmulx_lane_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulx_lane_f64_1.c: New.
+ * gcc.target/aarch64/simd/vmulx_laneq_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: New.
+ * gcc.target/aarch64/simd/vmulxq_lane_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulxq_lane_f64_1.c: New.
+ * gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c: New.
+ * gcc.target/aarch64/simd/vmulxs_lane_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c: New.
+ * gcc.target/aarch64/simd/vmulxd_lane_f64_1.c: New.
+ * gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c: New.
+
2015-11-21 Steven G. Kargl <kargl@gcc.gnu.org>
* gfortran.dg/simplify_cshift_1.f90: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c
new file mode 100644
index 0000000..4f80678
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c
@@ -0,0 +1,70 @@
+/* Test the vmulx_lane_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_lane0_f32 (float32x2_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulx_lane_f32 (vec1_1, vec1_2, 0);
+}
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_lane1_f32 (float32x2_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulx_lane_f32 (vec1_1, vec1_2, 1);
+}
+
+void
+test_case (float32_t v1[2], float32_t v2[2], float32_t e1[2], float32_t e2[2])
+{
+ int i;
+ float32x2_t vec1_1 = vld1_f32 (v1);
+ float32x2_t vec1_2 = vld1_f32 (v2);
+
+
+ float32x2_t actual1 = test_vmulx_lane0_f32 (vec1_1, vec1_2);
+ float32_t actual1_1[2];
+ vst1_f32 (actual1_1, actual1);
+
+ for (i = 0; i < 2; ++i)
+ if (actual1_1[i] != e1[i])
+ abort ();
+
+ float32x2_t actual2 = test_vmulx_lane1_f32 (vec1_1, vec1_2);
+ float32_t actual2_1[2];
+ vst1_f32 (actual2_1, actual2);
+
+ for (i = 0; i < 2; ++i)
+ if (actual2_1[i] != e2[i])
+ abort ();
+}
+
+int
+main (void)
+{
+ float32_t v1 = 3.14159265359;
+ float32_t v2 = 1.383894;
+ float32_t v3 = -2.71828;
+ float32_t v4 = -3.4891931;
+
+ float32_t v1_1[] = {v1, v2};
+ float32_t v1_2[] = {v3, v4};
+ float32_t e1_1[] = {v1 * v3, v2 * v3};
+ float32_t e1_2[] = {v1 * v4, v2 * v4};
+ test_case (v1_1, v1_2, e1_1, e1_2);
+
+ float32_t v2_1[] = {0, -0.0};
+ float32_t v2_2[] = {__builtin_huge_valf (), -__builtin_huge_valf ()};
+ float32_t e2_1[] = {2.0, -2.0};
+ float32_t e2_2[] = {-2.0, 2.0};
+ test_case (v2_1, v2_2, e2_1, e2_2);
+
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c
new file mode 100644
index 0000000..0ebdb96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c
@@ -0,0 +1,62 @@
+/* Test the vmulx_lane_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64x1_t __attribute__ ((noinline))
+test_vmulx_lane_f64 (float64x1_t vec1_1, float64x1_t vec1_2)
+{
+ return vmulx_lane_f64 (vec1_1, vec1_2, 0);
+}
+
+void
+test_case (float64_t v1[], float64_t v2[], float64_t e[])
+{
+ float64x1_t vec1_1 = vld1_f64 (v1);
+ float64x1_t vec1_2 = vld1_f64 (v2);
+ float64x1_t expected1 = vld1_f64 (e);
+
+ float64x1_t actual1 = test_vmulx_lane_f64 (vec1_1, vec1_2);
+ float64_t actual[1];
+ vst1_f64 (actual, actual1);
+ if (actual[0] != e[0])
+ abort ();
+}
+int
+main (void)
+{
+ float64_t v1 = 3.14159265359;
+ float64_t v2 = -2.71828;
+
+ float64_t v1_1[] = {v1};
+ float64_t v1_2[] = {v2};
+ float64_t e1[] = {v1 * v2};
+ test_case (v1_1, v1_2, e1);
+
+ float64_t v2_1[] = {0};
+ float64_t v2_2[] = {__builtin_huge_val ()};
+ float64_t e2[] = {2.0};
+ test_case (v2_1, v2_2, e2);
+
+ float64_t v4_1[] = {0};
+ float64_t v4_2[] = {-__builtin_huge_val ()};
+ float64_t e4[] = {-2.0};
+ test_case (v4_1, v4_2, e4);
+
+ float64_t v5_1[] = {-0.0};
+ float64_t v5_2[] = {__builtin_huge_val ()};
+ float64_t e5[] = {-2.0};
+ test_case (v5_1, v5_2, e5);
+
+ float64_t v6_1[] = {-0.0};
+ float64_t v6_2[] = {-__builtin_huge_val ()};
+ float64_t e6[] = {2.0};
+ test_case (v6_1, v6_2, e6);
+
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c
new file mode 100644
index 0000000..3e968b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c
@@ -0,0 +1,111 @@
+/* Test the vmulx_laneq_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_laneq_f32_lane0 (float32x2_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulx_laneq_f32 (vec1_1, vec1_2, 0);
+}
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_laneq_f32_lane1 (float32x2_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulx_laneq_f32 (vec1_1, vec1_2, 1);
+}
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_laneq_f32_lane2 (float32x2_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulx_laneq_f32 (vec1_1, vec1_2, 2);
+}
+
+float32x2_t __attribute__ ((noinline))
+test_vmulx_laneq_f32_lane3 (float32x2_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulx_laneq_f32 (vec1_1, vec1_2, 3);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, EXP2, EXP3, I) \
+ void set_and_test_case##I () \
+ { \
+ float32_t vec1_data[] = V1_D; \
+ float32x2_t vec1 = vld1_f32 (vec1_data); \
+ float32_t vec2_data[] = V2_D; \
+ float32x4_t vec2 = vld1q_f32 (vec2_data); \
+ \
+ float32_t expected_lane0[] = EXP0; \
+ float32_t expected_lane1[] = EXP1; \
+ float32_t expected_lane2[] = EXP2; \
+ float32_t expected_lane3[] = EXP3; \
+ \
+ float32x2_t actual_lane0_v = \
+ test_vmulx_laneq_f32_lane0 (vec1, vec2); \
+ float32_t actual_lane0[2]; \
+ vst1_f32 (actual_lane0, actual_lane0_v); \
+ if (actual_lane0[0] != expected_lane0[0] \
+ || actual_lane0[1] != expected_lane0[1]) \
+ abort (); \
+ \
+ float32x2_t actual_lane1_v = \
+ test_vmulx_laneq_f32_lane1 (vec1, vec2); \
+ float32_t actual_lane1[2]; \
+ vst1_f32 (actual_lane1, actual_lane1_v); \
+ if (actual_lane1[0] != expected_lane1[0] \
+ || actual_lane1[1] != expected_lane1[1]) \
+ abort (); \
+ \
+ float32x2_t actual_lane2_v = \
+ test_vmulx_laneq_f32_lane2 (vec1, vec2); \
+ float32_t actual_lane2[2]; \
+ vst1_f32 (actual_lane2, actual_lane2_v); \
+ if (actual_lane2[0] != expected_lane2[0] \
+ || actual_lane2[1] != expected_lane2[1]) \
+ abort (); \
+ \
+ float32x2_t actual_lane3_v = \
+ test_vmulx_laneq_f32_lane3 (vec1, vec2); \
+ float32_t actual_lane3[2]; \
+ vst1_f32 (actual_lane3, actual_lane3_v); \
+ if (actual_lane3[0] != expected_lane3[0] \
+ || actual_lane3[1] != expected_lane3[1]) \
+ abort (); \
+ \
+ } \
+
+float32_t v1 = 3.14159265359;
+float32_t v2 = 1.383894;
+float32_t v3 = -2.71828;
+float32_t v4 = -3.4891931;
+
+float32_t v5 = 0.0;
+float32_t v6 = -0.0;
+float32_t v7 = __builtin_huge_valf ();
+float32_t v8 = -__builtin_huge_valf ();
+
+SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1, v2, v3, v4),
+ PASS_ARRAY (v1*v1, v1*v2), PASS_ARRAY (v1*v2, v2*v2),
+ PASS_ARRAY (v1*v3, v2*v3), PASS_ARRAY (v1*v4, v2*v4), 1)
+
+SETUP_VEC (PASS_ARRAY (v5, v6), PASS_ARRAY (v5, v6, v7, v8),
+ PASS_ARRAY (0.0, -0.0), PASS_ARRAY (-0.0, 0.0),
+ PASS_ARRAY (2.0, -2.0), PASS_ARRAY (-2.0, 2.0), 2)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c
new file mode 100644
index 0000000..db79d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c
@@ -0,0 +1,76 @@
+/* Test the vmulx_laneq_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64x1_t __attribute__ ((noinline))
+test_vmulx_laneq_f64_lane0 (float64x1_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulx_laneq_f64 (vec1_1, vec1_2, 0);
+}
+
+float64x1_t __attribute__ ((noinline))
+test_vmulx_laneq_f64_lane1 (float64x1_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulx_laneq_f64 (vec1_1, vec1_2, 1);
+}
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \
+ void set_and_test_case##I () \
+ { \
+ float64_t vec1_data[] = V1_D; \
+ float64x1_t vec1 = vld1_f64 (vec1_data); \
+ float64_t vec2_data[] = V2_D; \
+ float64x2_t vec2 = vld1q_f64 (vec2_data); \
+ float64_t expected_lane0[] = EXP1; \
+ float64_t expected_lane1[] = EXP2; \
+ \
+ float64x1_t actual_lane0_v = \
+ test_vmulx_laneq_f64_lane0 (vec1, vec2); \
+ float64_t actual_lane0[1]; \
+ vst1_f64 (actual_lane0, actual_lane0_v); \
+ if (actual_lane0[0] != expected_lane0[0]) \
+ abort (); \
+ \
+ float64x1_t actual_lane1_v = \
+ test_vmulx_laneq_f64_lane1 (vec1, vec2); \
+ float64_t actual_lane1[1]; \
+ vst1_f64 (actual_lane1, actual_lane1_v); \
+ if (actual_lane1[0] != expected_lane1[0]) \
+ abort (); \
+ } \
+
+float64_t v1 = 3.14159265359;
+float64_t v2 = 1.383894;
+float64_t v3 = -2.71828;
+
+float64_t v4 = 0.0;
+float64_t v5 = __builtin_huge_val ();
+float64_t v6 = -__builtin_huge_val ();
+
+float64_t v7 = -0.0;
+float64_t v8 = __builtin_huge_val ();
+float64_t v9 = -__builtin_huge_val ();
+
+SETUP_VEC (PASS_ARRAY (v1), PASS_ARRAY (v2, v3), PASS_ARRAY (v1*v2),
+ PASS_ARRAY (v1*v3), 1)
+SETUP_VEC (PASS_ARRAY (v4), PASS_ARRAY (v5, v6), PASS_ARRAY (2.0),
+ PASS_ARRAY (-2.0), 2)
+SETUP_VEC (PASS_ARRAY (v7), PASS_ARRAY (v8, v9), PASS_ARRAY (-2.0),
+ PASS_ARRAY (2.0), 3)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c
new file mode 100644
index 0000000..b0bf180
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c
@@ -0,0 +1,54 @@
+/* Test the vmulxd_lane_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64_t __attribute__ ((noinline))
+test_vmulxd_lane_f64_lane0 (float64_t vec1_1, float64x1_t vec1_2)
+{
+ return vmulxd_lane_f64 (vec1_1, vec1_2, 0);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP1, I) \
+ void set_and_test_case##I () \
+ { \
+ float64_t vec1 = V1_D; \
+ float64_t vec2_data[] = V2_D; \
+ float64x1_t vec2 = vld1_f64 (vec2_data); \
+ float64_t expected_lane0 = EXP1; \
+ float64_t actual_lane0 = test_vmulxd_lane_f64_lane0 (vec1, vec2); \
+ if (actual_lane0 != expected_lane0) \
+ abort (); \
+ } \
+
+float64_t v1 = 3.14159265359;
+float64_t v2 = 1.383894;
+
+float64_t v4 = 0.0;
+float64_t v5 = -0.0;
+float64_t v6 = __builtin_huge_val ();
+float64_t v7 = -__builtin_huge_val ();
+
+SETUP_VEC (v1, PASS_ARRAY (v2), v1*v2, 1)
+SETUP_VEC (v4, PASS_ARRAY (v6), 2.0, 2)
+SETUP_VEC (v4, PASS_ARRAY (v7), -2.0, 3)
+SETUP_VEC (v5, PASS_ARRAY (v6), -2.0, 4)
+SETUP_VEC (v5, PASS_ARRAY (v7), 2.0, 5)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ set_and_test_case4 ();
+ set_and_test_case5 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?(?:\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]|\[dD\]\[0-9\])\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c
new file mode 100644
index 0000000..3f8303c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c
@@ -0,0 +1,62 @@
+/* Test the vmulxd_laneq_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64_t __attribute__ ((noinline))
+test_vmulxd_laneq_f64_lane0 (float64_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulxd_laneq_f64 (vec1_1, vec1_2, 0);
+}
+
+float64_t __attribute__ ((noinline))
+test_vmulxd_laneq_f64_lane1 (float64_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulxd_laneq_f64 (vec1_1, vec1_2, 1);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \
+ void set_and_test_case##I () \
+ { \
+ float64_t vec1 = V1_D; \
+ float64_t vec2_data[] = V2_D; \
+ float64x2_t vec2 = vld1q_f64 (vec2_data); \
+ float64_t expected_lane0 = EXP1; \
+ float64_t expected_lane1 = EXP2; \
+ float64_t actual_lane0 = test_vmulxd_laneq_f64_lane0 (vec1, vec2); \
+ if (actual_lane0 != expected_lane0) \
+ abort (); \
+ float64_t actual_lane1 = test_vmulxd_laneq_f64_lane1 (vec1, vec2); \
+ if (actual_lane1 != expected_lane1) \
+ abort (); \
+ } \
+
+float64_t v1 = 3.14159265359;
+float64_t v2 = 1.383894;
+float64_t v3 = -2.71828;
+
+float64_t v4 = 0.0;
+float64_t v5 = -0.0;
+float64_t v6 = __builtin_huge_val ();
+float64_t v7 = -__builtin_huge_val ();
+
+SETUP_VEC (v1, PASS_ARRAY (v2, v3), v1*v2, v1*v3, 1)
+SETUP_VEC (v4, PASS_ARRAY (v6, v7), 2.0, -2.0, 2)
+SETUP_VEC (v5, PASS_ARRAY (v6, v7), -2.0, 2.0, 3)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c
new file mode 100644
index 0000000..b5f5860
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c
@@ -0,0 +1,79 @@
+/* Test the vmulxq_lane_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_lane_f32_lane0 (float32x4_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulxq_lane_f32 (vec1_1, vec1_2, 0);
+}
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_lane_f32_lane1 (float32x4_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulxq_lane_f32 (vec1_1, vec1_2, 1);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, I) \
+ void set_and_test_case##I () \
+ { \
+ int i; \
+ float32_t vec1_data[] = V1_D; \
+ float32x4_t vec1 = vld1q_f32 (vec1_data); \
+ float32_t vec2_data[] = V2_D; \
+ float32x2_t vec2 = vld1_f32 (vec2_data); \
+ \
+ float32_t expected_lane0[] = EXP0; \
+ float32_t expected_lane1[] = EXP1; \
+ \
+ float32x4_t actual_lane0_v = \
+ test_vmulxq_lane_f32_lane0 (vec1, vec2); \
+ float32_t actual_lane0[4]; \
+ vst1q_f32 (actual_lane0, actual_lane0_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane0[i] != expected_lane0[i]) \
+ abort (); \
+ \
+ float32x4_t actual_lane1_v = \
+ test_vmulxq_lane_f32_lane1 (vec1, vec2); \
+ float32_t actual_lane1[4]; \
+ vst1q_f32 (actual_lane1, actual_lane1_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane1[i] != expected_lane1[i]) \
+ abort (); \
+ } \
+
+float32_t v1 = 3.14159265359;
+float32_t v2 = 1.383894;
+float32_t v3 = -2.71828;
+float32_t v4 = -3.4891931;
+
+float32_t v5 = 0.0;
+float32_t v6 = -0.0;
+float32_t v7 = __builtin_huge_valf ();
+float32_t v8 = -__builtin_huge_valf ();
+
+SETUP_VEC (PASS_ARRAY (v1, v2, v3, v4), PASS_ARRAY (v1, v2),
+ PASS_ARRAY (v1*v1, v2*v1, v3*v1, v4*v1),
+ PASS_ARRAY (v1*v2, v2*v2, v3*v2, v4*v2), 1)
+
+SETUP_VEC (PASS_ARRAY (v5, v6, v7, v8), PASS_ARRAY (v5, v6),
+ PASS_ARRAY (0.0, -0.0, 2.0, -2.0),
+ PASS_ARRAY (-0.0, 0.0, -2.0, 2.0), 2)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c
new file mode 100644
index 0000000..703ec6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c
@@ -0,0 +1,61 @@
+/* Test the vmulxq_lane_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64x2_t __attribute__ ((noinline))
+test_vmulxq_lane_f64_lane0 (float64x2_t vec1_1, float64x1_t vec1_2)
+{
+ return vmulxq_lane_f64 (vec1_1, vec1_2, 0);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP0, I) \
+ void set_and_test_case##I () \
+ { \
+ int i; \
+ float64_t vec1_data[] = V1_D; \
+ float64x2_t vec1 = vld1q_f64 (vec1_data); \
+ float64_t vec2_data[] = V2_D; \
+ float64x1_t vec2 = vld1_f64 (vec2_data); \
+ \
+ float64_t expected_lane0[] = EXP0; \
+ float64x2_t actual_lane0_v \
+ = test_vmulxq_lane_f64_lane0 (vec1, vec2); \
+ float64_t actual_lane0[2]; \
+ vst1q_f64 (actual_lane0, actual_lane0_v); \
+ for (i = 0; i < 1; ++i) \
+ if (actual_lane0[i] != expected_lane0[i]) \
+ abort (); \
+ } \
+
+float64_t v1 = 3.14159265359;
+float64_t v2 = 1.383894;
+
+float64_t v3 = __builtin_huge_val ();
+float64_t v4 = -__builtin_huge_val ();
+
+float64_t v5 = 0.0;
+float64_t v6 = -0.0;
+
+
+SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1), PASS_ARRAY (v1*v1, v2*v1), 1)
+
+SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v5), PASS_ARRAY (2.0, -2.0), 2)
+
+SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v6), PASS_ARRAY (-2.0, 2.0), 3)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c
new file mode 100644
index 0000000..264c0c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c
@@ -0,0 +1,118 @@
+/* Test the vmulxq_laneq_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_laneq_f32_lane0 (float32x4_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxq_laneq_f32 (vec1_1, vec1_2, 0);
+}
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_laneq_f32_lane1 (float32x4_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxq_laneq_f32 (vec1_1, vec1_2, 1);
+}
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_laneq_f32_lane2 (float32x4_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxq_laneq_f32 (vec1_1, vec1_2, 2);
+}
+
+float32x4_t __attribute__ ((noinline))
+test_vmulxq_laneq_f32_lane3 (float32x4_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxq_laneq_f32 (vec1_1, vec1_2, 3);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, EXP2, EXP3, I) \
+ void set_and_test_case##I () \
+ { \
+ int i; \
+ float32_t vec1_data[] = V1_D; \
+ float32x4_t vec1 = vld1q_f32 (vec1_data); \
+ float32_t vec2_data[] = V2_D; \
+ float32x4_t vec2 = vld1q_f32 (vec2_data); \
+ \
+ float32_t expected_lane0[] = EXP0; \
+ float32_t expected_lane1[] = EXP1; \
+ float32_t expected_lane2[] = EXP2; \
+ float32_t expected_lane3[] = EXP3; \
+ \
+ float32x4_t actual_lane0_v = \
+ test_vmulxq_laneq_f32_lane0 (vec1, vec2); \
+ float32_t actual_lane0[4]; \
+ vst1q_f32 (actual_lane0, actual_lane0_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane0[i] != expected_lane0[i]) \
+ abort (); \
+ \
+ float32x4_t actual_lane1_v = \
+ test_vmulxq_laneq_f32_lane1 (vec1, vec2); \
+ float32_t actual_lane1[4]; \
+ vst1q_f32 (actual_lane1, actual_lane1_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane1[i] != expected_lane1[i]) \
+ abort (); \
+ \
+ float32x4_t actual_lane2_v = \
+ test_vmulxq_laneq_f32_lane2 (vec1, vec2); \
+ float32_t actual_lane2[4]; \
+ vst1q_f32 (actual_lane2, actual_lane2_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane2[i] != expected_lane2[i]) \
+ abort (); \
+ \
+ float32x4_t actual_lane3_v = \
+ test_vmulxq_laneq_f32_lane3 (vec1, vec2); \
+ float32_t actual_lane3[4]; \
+ vst1q_f32 (actual_lane3, actual_lane3_v); \
+ for (i = 0; i < 4; ++i) \
+ if (actual_lane3[i] != expected_lane3[i]) \
+ abort (); \
+ } \
+
+float32_t v1 = 3.14159265359;
+float32_t v2 = 1.383894;
+float32_t v3 = -2.71828;
+float32_t v4 = -3.4891931;
+
+float32_t v5 = 0.0;
+float32_t v6 = -0.0;
+float32_t v7 = __builtin_huge_valf ();
+float32_t v8 = -__builtin_huge_valf ();
+
+float32_t spec = __builtin_huge_valf () * __builtin_huge_valf ();
+float32_t spec_n = -__builtin_huge_valf () * __builtin_huge_valf ();
+
+SETUP_VEC (PASS_ARRAY (v1, v2, v3, v4), PASS_ARRAY (v1, v2, v3, v4),
+ PASS_ARRAY (v1*v1, v1*v2, v1*v3, v1*v4),
+ PASS_ARRAY (v1*v2, v2*v2, v2*v3, v2*v4),
+ PASS_ARRAY (v1*v3, v2*v3, v3*v3, v4*v3),
+ PASS_ARRAY (v1*v4, v2*v4, v3*v4, v4*v4), 1)
+
+SETUP_VEC (PASS_ARRAY (v5, v6, v7, v8), PASS_ARRAY (v5, v6, v7, v8),
+ PASS_ARRAY (0.0, -0.0, 2.0, -2.0),
+ PASS_ARRAY (-0.0, 0.0, -2.0, 2.0),
+ PASS_ARRAY (2.0, -2.0, spec, spec_n),
+ PASS_ARRAY (-2.0, 2.0, spec_n, spec), 2)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c
new file mode 100644
index 0000000..14e9852
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c
@@ -0,0 +1,78 @@
+/* Test the vmulxq_laneq_f64 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float64x2_t __attribute__ ((noinline))
+test_vmulxq_laneq_f64_lane0 (float64x2_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulxq_laneq_f64 (vec1_1, vec1_2, 0);
+}
+
+float64x2_t __attribute__ ((noinline))
+test_vmulxq_laneq_f64_lane1 (float64x2_t vec1_1, float64x2_t vec1_2)
+{
+ return vmulxq_laneq_f64 (vec1_1, vec1_2, 1);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, I) \
+ void set_and_test_case##I () \
+ { \
+ int i; \
+ float64_t vec1_data[] = V1_D; \
+ float64x2_t vec1 = vld1q_f64 (vec1_data); \
+ float64_t vec2_data[] = V2_D; \
+ float64x2_t vec2 = vld1q_f64 (vec2_data); \
+ \
+ float64_t expected_lane0[] = EXP0; \
+ float64_t expected_lane1[] = EXP1; \
+ \
+ float64x2_t actual_lane0_v = \
+ test_vmulxq_laneq_f64_lane0 (vec1, vec2); \
+ float64_t actual_lane0[2]; \
+ vst1q_f64 (actual_lane0, actual_lane0_v); \
+ for (i = 0; i < 2; ++i) \
+ if (actual_lane0[i] != expected_lane0[i]) \
+ abort (); \
+ \
+ float64x2_t actual_lane1_v = \
+ test_vmulxq_laneq_f64_lane1 (vec1, vec2); \
+ float64_t actual_lane1[2]; \
+ vst1q_f64 (actual_lane1, actual_lane1_v); \
+ for (i = 0; i < 2; ++i) \
+ if (actual_lane1[i] != expected_lane1[i]) \
+ abort (); \
+ \
+ } \
+
+float64_t v1 = 3.14159265359;
+float64_t v2 = 1.383894;
+
+float64_t v3 = 0.0;
+float64_t v4 = -0.0;
+float64_t v5 = __builtin_huge_val ();
+float64_t v6 = -__builtin_huge_val ();
+
+float64_t spec = __builtin_huge_val () * __builtin_huge_val ();
+
+SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1, v2), PASS_ARRAY (v1*v1, v2*v1),
+ PASS_ARRAY (v1*v2, v2*v2), 1)
+
+SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v5, v6), PASS_ARRAY (2.0, -2.0),
+ PASS_ARRAY (-2.0, 2.0), 2)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c
new file mode 100644
index 0000000..124dcd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c
@@ -0,0 +1,61 @@
+/* Test the vmulxs_lane_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_lane_f32_lane0 (float32_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulxs_lane_f32 (vec1_1, vec1_2, 0);
+}
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_lane_f32_lane1 (float32_t vec1_1, float32x2_t vec1_2)
+{
+ return vmulxs_lane_f32 (vec1_1, vec1_2, 1);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \
+ void set_and_test_case##I () \
+ { \
+ float32_t vec1 = V1_D; \
+ float32_t vec2_data[] = V2_D; \
+ float32x2_t vec2 = vld1_f32 (vec2_data); \
+ float32_t expected_lane0 = EXP1; \
+ float32_t expected_lane1 = EXP2; \
+ float32_t actual_lane0 = test_vmulxs_lane_f32_lane0 (vec1, vec2); \
+ if (actual_lane0 != expected_lane0) \
+ abort (); \
+ float32_t actual_lane1 = test_vmulxs_lane_f32_lane1 (vec1, vec2); \
+ if (actual_lane1 != expected_lane1) \
+ abort (); \
+ } \
+
+float32_t v1 = 3.14159265359;
+float32_t v2 = 1.383894;
+
+float32_t v4 = 0.0;
+float32_t v5 = -0.0;
+float32_t v6 = __builtin_huge_valf ();
+float32_t v7 = -__builtin_huge_valf ();
+
+SETUP_VEC (v1, PASS_ARRAY (v1, v2), v1*v1, v1*v2, 1)
+SETUP_VEC (v4, PASS_ARRAY (v6, v7), 2.0, -2.0, 2)
+SETUP_VEC (v5, PASS_ARRAY (v6, v7), -2.0, 2.0, 3)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c
new file mode 100644
index 0000000..255f096
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c
@@ -0,0 +1,85 @@
+/* Test the vmulxs_laneq_f32 AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_laneq_f32_lane0 (float32_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxs_laneq_f32 (vec1_1, vec1_2, 0);
+}
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_laneq_f32_lane1 (float32_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxs_laneq_f32 (vec1_1, vec1_2, 1);
+}
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_laneq_f32_lane2 (float32_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxs_laneq_f32 (vec1_1, vec1_2, 2);
+}
+
+float32_t __attribute__ ((noinline))
+test_vmulxs_laneq_f32_lane3 (float32_t vec1_1, float32x4_t vec1_2)
+{
+ return vmulxs_laneq_f32 (vec1_1, vec1_2, 3);
+}
+
+#define PASS_ARRAY(...) {__VA_ARGS__}
+
+#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, EXP3, EXP4, I) \
+ void set_and_test_case##I () \
+ { \
+ float32_t vec1 = V1_D; \
+ float32_t vec2_data[] = V2_D; \
+ float32x4_t vec2 = vld1q_f32 (vec2_data); \
+ float32_t expected_lane0 = EXP1; \
+ float32_t expected_lane1 = EXP2; \
+ float32_t expected_lane2 = EXP3; \
+ float32_t expected_lane3 = EXP4; \
+ float32_t actual_lane0 = test_vmulxs_laneq_f32_lane0 (vec1, vec2); \
+ if (actual_lane0 != expected_lane0) \
+ abort (); \
+ float32_t actual_lane1 = test_vmulxs_laneq_f32_lane1 (vec1, vec2); \
+ if (actual_lane1 != expected_lane1) \
+ abort (); \
+ float32_t actual_lane2 = test_vmulxs_laneq_f32_lane2 (vec1, vec2); \
+ if (actual_lane2 != expected_lane2) \
+ abort (); \
+ float32_t actual_lane3 = test_vmulxs_laneq_f32_lane3 (vec1, vec2); \
+ if (actual_lane3 != expected_lane3) \
+ abort (); \
+ } \
+
+float32_t v1 = 3.14159265359;
+float32_t v2 = 1.383894;
+float32_t v3 = -2.71828;
+float32_t v4 = -3.4891931;
+
+float32_t v5 = 0.0;
+float32_t v6 = -0.0;
+float32_t v7 = __builtin_huge_valf ();
+float32_t v8 = -__builtin_huge_valf ();
+
+SETUP_VEC (v1, PASS_ARRAY (v1, v2, v3, v4), v1*v1, v1*v2, v3*v1, v1*v4, 1)
+SETUP_VEC (v5, PASS_ARRAY (v5, v6, v7, v8), 0.0, -0.0, 2.0, -2.0, 2)
+SETUP_VEC (v6, PASS_ARRAY (v5, v6, v7, v8), -0.0, 0.0, -2.0, 2.0, 3)
+
+int
+main (void)
+{
+ set_and_test_case1 ();
+ set_and_test_case2 ();
+ set_and_test_case3 ();
+ return 0;
+}
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */
+/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */