diff options
author | James Greenhalgh <james.greenhalgh@arm.com> | 2013-08-09 09:28:51 +0000 |
---|---|---|
committer | James Greenhalgh <jgreenhalgh@gcc.gnu.org> | 2013-08-09 09:28:51 +0000 |
commit | 66adb8eb440d94f72f9973f63b1aac722eb1201d (patch) | |
tree | 443565ab356338f6e52228994fadec8e90fe9233 /gcc | |
parent | 23a6cb7838f73aca404e5cc25a1cfbe1064db068 (diff) | |
download | gcc-66adb8eb440d94f72f9973f63b1aac722eb1201d.zip gcc-66adb8eb440d94f72f9973f63b1aac722eb1201d.tar.gz gcc-66adb8eb440d94f72f9973f63b1aac722eb1201d.tar.bz2 |
[AArch64] Fixup the vget_lane RTL patterns and intrinsics
gcc/
* config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove.
(get_lane_unsigned): Likewise.
(dup_lane_scalar): Likewise.
(get_lane): enable for VALL.
* config/aarch64/aarch64-simd.md
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_get_lane_signed<mode>): Likewise.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): New.
(aarch64_get_lane_zero_extendsi<mode>): Likewise.
(aarch64_get_lane<mode>): Enable for all vector modes.
(aarch64_get_lanedi): Remove misleading constraints.
* config/aarch64/arm_neon.h
(__aarch64_vget_lane_any): Define.
(__aarch64_vget<q>_lane_<fpsu><8,16,32,64>): Likewise.
(vget<q>_lane_<fpsu><8,16,32,64>): Use __aarch64_vget_lane macros.
(vdup<bhsd>_lane_<su><8,16,32,64>): Likewise.
* config/aarch64/iterators.md (VDQQH): New.
(VDQQHS): Likewise.
(vwcore): Likewise.
gcc/testsuite/
* gcc.target/aarch64/scalar_intrinsics.c: Update expected
output of vdup intrinsics.
From-SVN: r201624
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 23 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 57 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 249 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 15 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c | 8 |
7 files changed, 236 insertions, 129 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8ad53a5..8d80204 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2013-08-09 James Greenhalgh <james.greenhalgh@arm.com> + + * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove. + (get_lane_unsigned): Likewise. + (dup_lane_scalar): Likewise. + (get_lane): enable for VALL. + * config/aarch64/aarch64-simd.md + (aarch64_dup_lane_scalar<mode>): Remove. + (aarch64_get_lane_signed<mode>): Likewise. + (aarch64_get_lane_unsigned<mode>): Likewise. + (aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): New. + (aarch64_get_lane_zero_extendsi<mode>): Likewise. + (aarch64_get_lane<mode>): Enable for all vector modes. + (aarch64_get_lanedi): Remove misleading constraints. + * config/aarch64/arm_neon.h + (__aarch64_vget_lane_any): Define. + (__aarch64_vget<q>_lane_<fpsu><8,16,32,64>): Likewise. + (vget<q>_lane_<fpsu><8,16,32,64>): Use __aarch64_vget_lane macros. + (vdup<bhsd>_lane_<su><8,16,32,64>): Likewise. + * config/aarch64/iterators.md (VDQQH): New. + (VDQQHS): Likewise. + (vwcore): Likewise. + 2013-08-09 Eric Botcazou <ebotcazou@adacore.com> * configure.ac: Add GAS check for LEON instructions on SPARC. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 55dead6..4046d7a 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -40,10 +40,6 @@ 10 - CODE_FOR_<name><mode>. */ BUILTIN_VD_RE (CREATE, create, 0) - BUILTIN_VQ_S (GETLANE, get_lane_signed, 0) - BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0) - BUILTIN_VDQF (GETLANE, get_lane, 0) - VAR1 (GETLANE, get_lane, 0, di) BUILTIN_VDC (COMBINE, combine, 0) BUILTIN_VB (BINOP, pmul, 0) BUILTIN_VDQF (UNOP, sqrt, 2) @@ -51,6 +47,9 @@ VAR1 (UNOP, addp, 0, di) VAR1 (UNOP, clz, 2, v4si) + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) @@ -64,7 +63,6 @@ BUILTIN_VQ (REINTERP, reinterpretv2df, 0) BUILTIN_VDQ_I (BINOP, dup_lane, 0) - BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0) /* Implemented by aarch64_<sur>q<r>shl<mode>. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0) BUILTIN_VSDQ_I (BINOP, uqshl, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 3c76032..9823730 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -357,20 +357,6 @@ (set_attr "simd_mode" "<MODE>")] ) -(define_insn "aarch64_dup_lane_scalar<mode>" - [(set (match_operand:<VEL> 0 "register_operand" "=w, r") - (vec_select:<VEL> - (match_operand:VDQ 1 "register_operand" "w, w") - (parallel [(match_operand:SI 2 "immediate_operand" "i, i")]) - ))] - "TARGET_SIMD" - "@ - dup\\t%<Vetype>0, %1.<Vetype>[%2] - umov\\t%<vw>0, %1.<Vetype>[%2]" - [(set_attr "simd_type" "simd_dup, simd_movgp") - (set_attr "simd_mode" "<MODE>")] -) - (define_insn "aarch64_simd_dup<mode>" [(set (match_operand:VDQF 0 "register_operand" "=w") (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))] @@ -2147,45 +2133,50 @@ DONE; }) -(define_insn "aarch64_get_lane_signed<mode>" - [(set (match_operand:<VEL> 0 "register_operand" "=r") - (sign_extend:<VEL> +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI (vec_select:<VEL> - (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "smov\\t%0, %1.<Vetype>[%2]" + "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]" [(set_attr "simd_type" "simd_movgp") - (set_attr "simd_mode" "<MODE>")] + (set_attr "simd_mode" "<VDQQH:MODE>")] ) -(define_insn "aarch64_get_lane_unsigned<mode>" - [(set (match_operand:<VEL> 0 "register_operand" "=r") - (zero_extend:<VEL> +(define_insn "*aarch64_get_lane_zero_extendsi<mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:<VEL> - (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "umov\\t%<vw>0, %1.<Vetype>[%2]" + "umov\\t%w0, %1.<Vetype>[%2]" [(set_attr "simd_type" "simd_movgp") (set_attr "simd_mode" "<MODE>")] ) +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. (define_insn "aarch64_get_lane<mode>" - [(set (match_operand:<VEL> 0 "register_operand" "=w") + [(set (match_operand:<VEL> 0 "register_operand" "=r, w") (vec_select:<VEL> - (match_operand:VDQF 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VALL 1 "register_operand" "w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))] "TARGET_SIMD" - "mov\\t%0.<Vetype>[0], %1.<Vetype>[%2]" - [(set_attr "simd_type" "simd_ins") + "@ + umov\\t%<vwcore>0, %1.<Vetype>[%2] + dup\\t%<Vetype>0, %1.<Vetype>[%2]" + [(set_attr "simd_type" "simd_movgp, simd_dup") (set_attr "simd_mode" "<MODE>")] ) (define_expand "aarch64_get_lanedi" - [(match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[2], 0, 1); diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 99cf123..73a5400 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -446,7 +446,66 @@ typedef struct poly16x8x4_t poly16x8_t val[4]; } poly16x8x4_t; - +/* vget_lane internal macros. */ + +#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ + (__cast_ret \ + __builtin_aarch64_get_lane##__size (__cast_a __a, __b)) + +#define __aarch64_vget_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v2sf, , , __a, __b) +#define __aarch64_vget_lane_f64(__a, __b) (__a) + +#define __aarch64_vget_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) + +#define __aarch64_vget_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, , ,__a, __b) +#define __aarch64_vget_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, , ,__a, __b) +#define __aarch64_vget_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v2si, , ,__a, __b) +#define __aarch64_vget_lane_s64(__a, __b) (__a) + +#define __aarch64_vget_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) +#define __aarch64_vget_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) +#define __aarch64_vget_lane_u64(__a, __b) (__a) + +#define __aarch64_vgetq_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v4sf, , , __a, __b) +#define __aarch64_vgetq_lane_f64(__a, __b) \ + __aarch64_vget_lane_any (v2df, , , __a, __b) + +#define __aarch64_vgetq_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) + +#define __aarch64_vgetq_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, , ,__a, __b) +#define __aarch64_vgetq_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, , ,__a, __b) +#define __aarch64_vgetq_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v4si, , ,__a, __b) +#define __aarch64_vgetq_lane_s64(__a, __b) \ + __aarch64_vget_lane_any (v2di, , ,__a, __b) + +#define __aarch64_vgetq_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) +#define __aarch64_vgetq_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) +#define __aarch64_vgetq_lane_u64(__a, __b) \ + __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) + +/* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { @@ -2307,155 +2366,156 @@ vcreate_p16 (uint64_t __a) return (poly16x4_t) __a; } +/* vget_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vget_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vget_lane_s8 (int8x8_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b); + return __aarch64_vget_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vget_lane_s16 (int16x4_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b); + return __aarch64_vget_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vget_lane_s32 (int32x2_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b); + return __aarch64_vget_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vget_lane_f32 (float32x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) { - return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b); + return __aarch64_vget_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vget_lane_u8 (uint8x8_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vget_lane_u16 (uint16x4_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vget_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vget_lane_u32 (uint32x2_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a, - __b); + return __aarch64_vget_lane_u32 (__a, __b); } -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vget_lane_p8 (poly8x8_t __a, const int __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) { - return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u64 (__a, __b); } -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vget_lane_p16 (poly16x4_t __a, const int __b) +/* vgetq_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) { - return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vgetq_lane_f32 (__a, __b); } -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vget_lane_s64 (int64x1_t __a, const int __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vgetq_lane_f64 (float64x2_t __a, const int __b) { - return (int64_t) __builtin_aarch64_get_lanedi (__a, __b); + return __aarch64_vgetq_lane_f64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vget_lane_u64 (uint64x1_t __a, const int __b) +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b); + return __aarch64_vgetq_lane_p16 (__a, __b); } __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vgetq_lane_s8 (int8x16_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b); + return __aarch64_vgetq_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vgetq_lane_s16 (int16x8_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b); + return __aarch64_vgetq_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vgetq_lane_s32 (int32x4_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b); -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vgetq_lane_f32 (float32x4_t __a, const int __b) -{ - return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b); + return __aarch64_vgetq_lane_s32 (__a, __b); } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vgetq_lane_f64 (float64x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) { - return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b); + return __aarch64_vgetq_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vgetq_lane_u8 (uint8x16_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); + return __aarch64_vgetq_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vgetq_lane_u16 (uint16x8_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); + return __aarch64_vgetq_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vgetq_lane_u32 (uint32x4_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a, - __b); -} - -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vgetq_lane_p8 (poly8x16_t __a, const int __b) -{ - return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); -} - -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vgetq_lane_p16 (poly16x8_t __a, const int __b) -{ - return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); -} - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vgetq_lane_s64 (int64x2_t __a, const int __b) -{ - return __builtin_aarch64_get_lane_unsignedv2di (__a, __b); + return __aarch64_vgetq_lane_u32 (__a, __b); } __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a, - __b); + return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vreinterpret */ + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s8 (int8x8_t __a) { @@ -6724,18 +6784,6 @@ vget_high_u64 (uint64x2_t a) return result; } -#define vget_lane_f64(a, b) \ - __extension__ \ - ({ \ - float64x1_t a_ = (a); \ - float64_t result; \ - __asm__ ("umov %x0, %1.d[%2]" \ - : "=r"(result) \ - : "w"(a_), "i"(b) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vget_low_f32 (float32x4_t a) { @@ -19732,49 +19780,49 @@ vcvtpq_u64_f64 (float64x2_t __a) __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vdupb_lane_s8 (int8x16_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv16qi (a, b); + return __aarch64_vget_laneq_s8 (a, b); } __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vdupb_lane_u8 (uint8x16_t a, int const b) { - return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b); + return __aarch64_vget_laneq_u8 (a, b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vduph_lane_s16 (int16x8_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv8hi (a, b); + return __aarch64_vget_laneq_s16 (a, b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vduph_lane_u16 (uint16x8_t a, int const b) { - return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b); + return __aarch64_vget_laneq_u16 (a, b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vdups_lane_s32 (int32x4_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv4si (a, b); + return __aarch64_vget_laneq_s32 (a, b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vdups_lane_u32 (uint32x4_t a, int const b) { - return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b); + return __aarch64_vget_laneq_u32 (a, b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdupd_lane_s64 (int64x2_t a, int const b) { - return __builtin_aarch64_dup_lane_scalarv2di (a, b); + return __aarch64_vget_laneq_s64 (a, b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vdupd_lane_u64 (uint64x2_t a, int const b) { - return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b); + return __aarch64_vget_laneq_s64 (a, b); } /* vld1 */ @@ -25581,4 +25629,31 @@ __INTERLEAVE_LIST (zip) /* End of optimal implementations in approved order. */ +#undef __aarch64_vget_lane_any +#undef __aarch64_vget_lane_f32 +#undef __aarch64_vget_lane_f64 +#undef __aarch64_vget_lane_p8 +#undef __aarch64_vget_lane_p16 +#undef __aarch64_vget_lane_s8 +#undef __aarch64_vget_lane_s16 +#undef __aarch64_vget_lane_s32 +#undef __aarch64_vget_lane_s64 +#undef __aarch64_vget_lane_u8 +#undef __aarch64_vget_lane_u16 +#undef __aarch64_vget_lane_u32 +#undef __aarch64_vget_lane_u64 + +#undef __aarch64_vgetq_lane_f32 +#undef __aarch64_vgetq_lane_f64 +#undef __aarch64_vgetq_lane_p8 +#undef __aarch64_vgetq_lane_p16 +#undef __aarch64_vgetq_lane_s8 +#undef __aarch64_vgetq_lane_s16 +#undef __aarch64_vgetq_lane_s32 +#undef __aarch64_vgetq_lane_s64 +#undef __aarch64_vgetq_lane_u8 +#undef __aarch64_vgetq_lane_u16 +#undef __aarch64_vgetq_lane_u32 +#undef __aarch64_vgetq_lane_u64 + #endif diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3ec889f..37b6cbc 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -134,9 +134,15 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +;; Vector modes for Q and H types. +(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) + ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for Q, H and S types. +(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + ;; Vector and scalar integer modes for H and S (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI]) @@ -453,6 +459,15 @@ (V2SF "s") (V4SF "s") (V2DF "d")]) +;; Corresponding core element mode for each vector mode. This is a +;; variation on <vw> mapping FP modes to GP regs. +(define_mode_attr vwcore [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "w") (V4SF "w") + (V2DF "x")]) + ;; Double vector types for ALLX. (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a777c7d..1e682b9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-08-09 James Greenhalgh <james.greenhalgh@arm.com> + + * gcc.target/aarch64/scalar_intrinsics.c: Update expected + output of vdup intrinsics. + 2013-08-09 Zhenqiang Chen <zhenqiang.chen@linaro.org> * gcc.target/arm/lp1189445.c: New testcase. diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index 3d902f6..d84bfeb 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -193,7 +193,7 @@ test_vcltzd_s64 (int64x1_t a) return res; } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv16qi" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev16qi" 2 } } */ int8x1_t test_vdupb_lane_s8 (int8x16_t a) @@ -207,7 +207,7 @@ test_vdupb_lane_u8 (uint8x16_t a) return vdupb_lane_u8 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv8hi" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */ int16x1_t test_vduph_lane_s16 (int16x8_t a) @@ -221,7 +221,7 @@ test_vduph_lane_u16 (uint16x8_t a) return vduph_lane_u16 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv4si" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */ int32x1_t test_vdups_lane_s32 (int32x4_t a) @@ -235,7 +235,7 @@ test_vdups_lane_u32 (uint32x4_t a) return vdups_lane_u32 (a, 2); } -/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv2di" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */ int64x1_t test_vdupd_lane_s64 (int64x2_t a) |