[AArch64] Implement vmul<q>_lane<q>_<fsu><16,32,64> intrinsics in C

gcc/ * config/aarch64/aarch64-simd.md (aarch64_mul3_elt<mode>): New. (aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (aarch64_mul3_elt_to_128df): Likewise. (aarch64_mul3_elt_to_64v2df): Likewise. * config/aarch64/iterators.md (VEL): Also handle DFmode. (VMUL): New. (VMUL_CHANGE_NLANES) Likewise. (h_con): Likewise. (f): Likewise. * config/aarch64/arm_neon.h (vmul<q>_lane<q>_<suf><16,32,64>): Convert to C implementation. gcc/testsuite/ * gcc.target/aarch64/mul_intrinsic_1.c: New. * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise. From-SVN: r202624
author: James Greenhalgh <james.greenhalgh@arm.com> 2013-09-16 09:50:21 +0000
committer: James Greenhalgh <jgreenhalgh@gcc.gnu.org> 2013-09-16 09:50:21 +0000
commit: 779aea46cc3b6b1a62f989ee0c04e68803733ba0 (patch)
tree: 4d1dcfabd30aa74d7ff264150aab3706bc9769e6 /gcc/config
parent: a407a750a7578c3c1ab9881b032f008c7be99684 (diff)
download: gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.zip
gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.tar.gz
gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.tar.bz2
3 files changed, 228 insertions, 287 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9805197..04d5794 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -582,6 +582,59 @@
    (set_attr "simd_mode" "<MODE>")]
 )
 
+(define_insn "*aarch64_mul3_elt<mode>"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+    (mult:VMUL
+      (vec_duplicate:VMUL
+	  (vec_select:<VEL>
+	    (match_operand:VMUL 1 "register_operand" "<h_con>")
+	    (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VMUL 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_<f>mul_elt")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
+  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
+     (mult:VMUL_CHANGE_NLANES
+       (vec_duplicate:VMUL_CHANGE_NLANES
+	  (vec_select:<VEL>
+	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
+	    (parallel [(match_operand:SI 2 "immediate_operand")])))
+      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_<f>mul_elt")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_mul3_elt_to_128df"
+  [(set (match_operand:V2DF 0 "register_operand" "=w")
+     (mult:V2DF
+       (vec_duplicate:V2DF
+	 (match_operand:DF 2 "register_operand" "w"))
+      (match_operand:V2DF 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fmul\\t%0.2d, %1.2d, %2.d[0]"
+  [(set_attr "simd_type" "simd_fmul_elt")
+   (set_attr "simd_mode" "V2DF")]
+)
+
+(define_insn "*aarch64_mul3_elt_to_64v2df"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+     (mult:DF
+       (vec_select:DF
+	 (match_operand:V2DF 1 "register_operand" "w")
+	 (parallel [(match_operand:SI 2 "immediate_operand")]))
+       (match_operand:DF 3 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "fmul\\t%0.2d, %3.2d, %1.d[%2]"
+  [(set_attr "simd_type" "simd_fmul_elt")
+   (set_attr "simd_mode" "V2DF")]
+)
+
 (define_insn "neg<mode>2"
   [(set (match_operand:VDQ 0 "register_operand" "=w")
 	(neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 23b1116..6c9dd79 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9501,136 +9501,6 @@ vmovq_n_u64 (uint64_t a)
   return result;
 }
 
-#define vmul_lane_f32(a, b, c)                                          \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t b_ = (b);                                            \
-       float32x2_t a_ = (a);                                            \
-       float32x2_t result;                                              \
-       __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]"                             \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_lane_s16(a, b, c)                                          \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("mul %0.4h,%1.4h,%2.h[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_lane_s32(a, b, c)                                          \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("mul %0.2s,%1.2s,%2.s[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_lane_u16(a, b, c)                                          \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("mul %0.4h,%1.4h,%2.h[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_lane_u32(a, b, c)                                          \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_laneq_f32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t b_ = (b);                                            \
-       float32x2_t a_ = (a);                                            \
-       float32x2_t result;                                              \
-       __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]"                           \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_laneq_s16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("mul %0.4h, %1.4h, %2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_laneq_s32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_laneq_u16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("mul %0.4h, %1.4h, %2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmul_laneq_u32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("mul %0.2s, %1.2s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmul_n_f32 (float32x2_t a, float32_t b)
 {
@@ -10149,162 +10019,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
   return result;
 }
 
-#define vmulq_lane_f32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t b_ = (b);                                            \
-       float32x4_t a_ = (a);                                            \
-       float32x4_t result;                                              \
-       __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]"                           \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_lane_f64(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t b_ = (b);                                            \
-       float64x2_t a_ = (a);                                            \
-       float64x2_t result;                                              \
-       __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]"                             \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_lane_s16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int16x8_t result;                                                \
-       __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_lane_s32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("mul %0.4s,%1.4s,%2.s[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_lane_u16(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint16x8_t result;                                               \
-       __asm__ ("mul %0.8h,%1.8h,%2.h[%3]"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_lane_u32(a, b, c)                                         \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_f32(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t b_ = (b);                                            \
-       float32x4_t a_ = (a);                                            \
-       float32x4_t result;                                              \
-       __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]"                           \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_f64(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t b_ = (b);                                            \
-       float64x2_t a_ = (a);                                            \
-       float64x2_t result;                                              \
-       __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]"                             \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_s16(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int16x8_t result;                                                \
-       __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_s32(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_u16(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint16x8_t result;                                               \
-       __asm__ ("mul %0.8h, %1.8h, %2.h[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "x"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vmulq_laneq_u32(a, b, c)                                        \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("mul %0.4s, %1.4s, %2.s[%3]"                            \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmulq_n_f32 (float32x4_t a, float32_t b)
 {
@@ -21435,6 +21149,158 @@ vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
   return a - b * c;
 }
 
+/* vmul_lane  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u32 (__b, __lane);
+}
+
+/* vmul_laneq  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
+}
+
+/* vmulq_lane  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
+{
+  return __a * __b;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_u32 (__b, __lane);
+}
+
+/* vmulq_laneq  */
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
+}
+
 /* vqabs */
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index ffe125b..a6b3117 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -169,6 +169,12 @@
 ;; Double scalar modes
 (define_mode_iterator DX [DI DF])
 
+;; Modes available for <f>mul lane operations.
+(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
+
+;; Modes available for <f>mul lane operations changing lane count.
+(define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF])
+
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 ;; aarch64.md but for their use in int_iterators here.
@@ -358,7 +364,7 @@
                         (V2SI "SI") (V4SI "SI")
                         (DI "DI")   (V2DI "DI")
                         (V2SF "SF") (V4SF "SF")
-                        (V2DF "DF")
+                        (V2DF "DF") (DF "DF")
 			(SI   "SI") (HI   "HI")
 			(QI   "QI")])
 
@@ -541,6 +547,22 @@
 				    (V2SF "to_128") (V4SF  "to_64")
 				    (DF   "to_128") (V2DF  "to_64")])
 
+;; For certain vector-by-element multiplication instructions we must
+;; constrain the HI cases to use only V0-V15.  This is covered by
+;; the 'x' constraint.  All other modes may use the 'w' constraint.
+(define_mode_attr h_con [(V2SI "w") (V4SI "w")
+			 (V4HI "x") (V8HI "x")
+			 (V2SF "w") (V4SF "w")
+			 (V2DF "w") (DF "w")])
+
+;; Defined to 'f' for types whose element type is a float type.
+(define_mode_attr f [(V8QI "")  (V16QI "")
+		     (V4HI "")  (V8HI  "")
+		     (V2SI "")  (V4SI  "")
+		     (DI   "")  (V2DI  "")
+		     (V2SF "f") (V4SF  "f")
+		     (V2DF "f") (DF    "f")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
author	James Greenhalgh <james.greenhalgh@arm.com>	2013-09-16 09:50:21 +0000
committer	James Greenhalgh <jgreenhalgh@gcc.gnu.org>	2013-09-16 09:50:21 +0000
commit	779aea46cc3b6b1a62f989ee0c04e68803733ba0 (patch)
tree	4d1dcfabd30aa74d7ff264150aab3706bc9769e6 /gcc/config
parent	a407a750a7578c3c1ab9881b032f008c7be99684 (diff)
download	gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.zip gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.tar.gz gcc-779aea46cc3b6b1a62f989ee0c04e68803733ba0.tar.bz2