aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSandra Loosemore <sandra@codesourcery.com>2010-07-02 20:31:43 -0400
committerSandra Loosemore <sandra@gcc.gnu.org>2010-07-02 20:31:43 -0400
commitbab53516d0a58120f1d3b24aabc09a3d7e6443aa (patch)
tree716708677aed26e5c65cf3ff167cec3906175513
parentb614e5669ebff10b7ec9b5fff4e539ab1416e38b (diff)
downloadgcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.zip
gcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.tar.gz
gcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.tar.bz2
neon.md (UNSPEC_VABA): Delete.
2010-07-02 Sandra Loosemore <sandra@codesourcery.com> Julian Brown <julian@codesourcery.com> gcc/ * config/arm/neon.md (UNSPEC_VABA): Delete. (UNSPEC_VABAL): Delete. (UNSPEC_VABS): Delete. (UNSPEC_VMUL_N): Delete. (adddi3_neon): New. (subdi3_neon): New. (mul<mode>3add<mode>_neon): Make the pattern named. (mul<mode>3neg<mode>add<mode>_neon): Likewise. (neon_vadd<mode>): Replace with define_expand, and move the remaining unspec parts... (neon_vadd<mode>_unspec): ...to this. (neon_vmla<mode>, neon_vmla<mode>_unspec): Likewise. (neon_vlms<mode>, neon_vmls<mode>_unspec): Likewise. (neon_vsub<mode>, neon_vsub<mode>_unspec): Likewise. (neon_vaba<mode>): Rewrite in terms of vabd. (neon_vabal<mode>): Rewrite in terms of vabdl. (neon_vabs<mode>): Rewrite without unspec. * config/arm/arm.md (*arm_adddi3): Disable for TARGET_NEON. (*arm_subdi3): Likewise. * config/arm/neon.ml (Vadd, Vsub): Split out 64-bit variants and add No_op attribute to disable assembly output checks. * config/arm/arm_neon.h: Regenerated. * doc/arm-neon-intrinsics.texi: Regenerated. gcc/testsuite/ * gcc.target/arm/neon/vadds64.c: Regenerated. * gcc.target/arm/neon/vaddu64.c: Regenerated. * gcc.target/arm/neon/vsubs64.c: Regenerated. * gcc.target/arm/neon/vsubu64.c: Regenerated. * gcc.target/arm/neon-vmla-1.c: Add -ffast-math to options. * gcc.target/arm/neon-vmls-1.c: Likewise. * gcc.target/arm/neon-vsubs64.c: New execution test. * gcc.target/arm/neon-vsubu64.c: New execution test. * gcc.target/arm/neon-vadds64.c: New execution test. * gcc.target/arm/neon-vaddu64.c: New execution test. Co-Authored-By: Julian Brown <julian@codesourcery.com> From-SVN: r161762
-rw-r--r--gcc/ChangeLog27
-rw-r--r--gcc/config/arm/arm.md7
-rw-r--r--gcc/config/arm/arm_neon.h24
-rw-r--r--gcc/config/arm/neon.md219
-rw-r--r--gcc/config/arm/neon.ml6
-rw-r--r--gcc/doc/arm-neon-intrinsics.texi20
-rw-r--r--gcc/testsuite/ChangeLog14
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vadds64.c21
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vaddu64.c21
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vmla-1.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vmls-1.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vsubs64.c21
-rw-r--r--gcc/testsuite/gcc.target/arm/neon-vsubu64.c21
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vadds64.c1
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vaddu64.c1
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vsubs64.c1
-rw-r--r--gcc/testsuite/gcc.target/arm/neon/vsubu64.c1
17 files changed, 318 insertions, 91 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 529d635..3943f8f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2010-07-02 Sandra Loosemore <sandra@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * config/arm/neon.md (UNSPEC_VABA): Delete.
+ (UNSPEC_VABAL): Delete.
+ (UNSPEC_VABS): Delete.
+ (UNSPEC_VMUL_N): Delete.
+ (adddi3_neon): New.
+ (subdi3_neon): New.
+ (mul<mode>3add<mode>_neon): Make the pattern named.
+ (mul<mode>3neg<mode>add<mode>_neon): Likewise.
+ (neon_vadd<mode>): Replace with define_expand, and move the remaining
+ unspec parts...
+ (neon_vadd<mode>_unspec): ...to this.
+ (neon_vmla<mode>, neon_vmla<mode>_unspec): Likewise.
+ (neon_vlms<mode>, neon_vmls<mode>_unspec): Likewise.
+ (neon_vsub<mode>, neon_vsub<mode>_unspec): Likewise.
+ (neon_vaba<mode>): Rewrite in terms of vabd.
+ (neon_vabal<mode>): Rewrite in terms of vabdl.
+ (neon_vabs<mode>): Rewrite without unspec.
+ * config/arm/arm.md (*arm_adddi3): Disable for TARGET_NEON.
+ (*arm_subdi3): Likewise.
+ * config/arm/neon.ml (Vadd, Vsub): Split out 64-bit variants and add
+ No_op attribute to disable assembly output checks.
+ * config/arm/arm_neon.h: Regenerated.
+ * doc/arm-neon-intrinsics.texi: Regenerated.
+
2010-07-02 Jan Hubicka <jh@suse.cz>
* ipa-split.c (split_function): For aggregate values set return_slot_opt;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f490966..fbbe7f4 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -495,9 +495,10 @@
(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
(match_operand:DI 2 "s_register_operand" "r, 0")))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)"
+ "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
"#"
- "TARGET_32BIT && reload_completed"
+ "TARGET_32BIT && reload_completed
+ && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))"
[(parallel [(set (reg:CC_C CC_REGNUM)
(compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
(match_dup 1)))
@@ -994,7 +995,7 @@
(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
(match_operand:DI 2 "s_register_operand" "r,0,0")))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT"
+ "TARGET_32BIT && !TARGET_NEON"
"subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
[(set_attr "conds" "clob")
(set_attr "length" "8")]
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index a6085bb..9cba0a9 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -414,12 +414,6 @@ vadd_s32 (int32x2_t __a, int32x2_t __b)
return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
}
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vadd_s64 (int64x1_t __a, int64x1_t __b)
-{
- return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
-}
-
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vadd_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -444,6 +438,12 @@ vadd_u32 (uint32x2_t __a, uint32x2_t __b)
return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+ return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vadd_u64 (uint64x1_t __a, uint64x1_t __b)
{
@@ -1368,12 +1368,6 @@ vsub_s32 (int32x2_t __a, int32x2_t __b)
return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
}
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vsub_s64 (int64x1_t __a, int64x1_t __b)
-{
- return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
-}
-
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vsub_f32 (float32x2_t __a, float32x2_t __b)
{
@@ -1398,6 +1392,12 @@ vsub_u32 (uint32x2_t __a, uint32x2_t __b)
return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+ return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vsub_u64 (uint64x1_t __a, uint64x1_t __b)
{
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 1b25842..48965ca 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -22,11 +22,8 @@
(define_constants
[(UNSPEC_ASHIFT_SIGNED 65)
(UNSPEC_ASHIFT_UNSIGNED 66)
- (UNSPEC_VABA 67)
- (UNSPEC_VABAL 68)
(UNSPEC_VABD 69)
(UNSPEC_VABDL 70)
- (UNSPEC_VABS 71)
(UNSPEC_VADD 72)
(UNSPEC_VADDHN 73)
(UNSPEC_VADDL 74)
@@ -75,7 +72,6 @@
(UNSPEC_VMULL 128)
(UNSPEC_VMUL_LANE 129)
(UNSPEC_VMULL_LANE 130)
- (UNSPEC_VMUL_N 131)
(UNSPEC_VPADAL 135)
(UNSPEC_VPADD 136)
(UNSPEC_VPADDL 137)
@@ -816,11 +812,8 @@
;; Doubleword and quadword arithmetic.
-;; NOTE: vadd/vsub and some other instructions also support 64-bit integer
-;; element size, which we could potentially use for "long long" operations. We
-;; don't want to do this at present though, because moving values from the
-;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is
-;; easy to do with ARM instructions anyway.
+;; NOTE: some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations.
(define_insn "*add<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
@@ -836,6 +829,26 @@
(const_string "neon_int_1")))]
)
+(define_insn "adddi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
+ (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0")
+ (match_operand:DI 2 "s_register_operand" "w,r,0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vadd.i64\t%P0, %P1, %P2";
+ case 1: return "#";
+ case 2: return "#";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1,*,*")
+ (set_attr "conds" "*,clob,clob")
+ (set_attr "length" "*,8,8")]
+)
+
(define_insn "*sub<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
@@ -850,6 +863,27 @@
(const_string "neon_int_2")))]
)
+(define_insn "subdi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+ (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0")
+ (match_operand:DI 2 "s_register_operand" "w,r,0,0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vsub.i64\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 2: /* fall through */
+ case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_2,*,*,*")
+ (set_attr "conds" "*,clob,clob,clob")
+ (set_attr "length" "*,8,8,8")]
+)
+
(define_insn "*mul<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
@@ -871,7 +905,7 @@
(const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
)
-(define_insn "*mul<mode>3add<mode>_neon"
+(define_insn "mul<mode>3add<mode>_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
(match_operand:VDQ 3 "s_register_operand" "w"))
@@ -893,7 +927,7 @@
(const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
)
-(define_insn "*mul<mode>3neg<mode>add<mode>_neon"
+(define_insn "mul<mode>3neg<mode>add<mode>_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
(mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
@@ -1718,11 +1752,37 @@
; good for plain vadd, vaddq.
-(define_insn "neon_vadd<mode>"
+(define_expand "neon_vadd<mode>"
+ [(match_operand:VDQX 0 "s_register_operand" "=w")
+ (match_operand:VDQX 1 "s_register_operand" "w")
+ (match_operand:VDQX 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
+; Note that NEON operations don't support the full IEEE 754 standard: in
+; particular, denormal values are flushed to zero. This means that GCC cannot
+; use those instructions for autovectorization, etc. unless
+; -funsafe-math-optimizations is in effect (in which case flush-to-zero
+; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
+; header) must work in either case: if -funsafe-math-optimizations is given,
+; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
+; expand to unspecs (which may potentially limit the extent to which they might
+; be optimized by generic code).
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vadd<mode>_unspec"
[(set (match_operand:VDQX 0 "s_register_operand" "=w")
(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
- (match_operand:VDQX 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
+ (match_operand:VDQX 2 "s_register_operand" "w")]
UNSPEC_VADD))]
"TARGET_NEON"
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -1795,6 +1855,8 @@
[(set_attr "neon_type" "neon_int_4")]
)
+;; We cannot replace this unspec with mul<mode>3 because of the odd
+;; polynomial multiplication case that can specified by operand 3.
(define_insn "neon_vmul<mode>"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
@@ -1818,13 +1880,31 @@
(const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
)
-(define_insn "neon_vmla<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:VDQW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VMLA))]
+(define_expand "neon_vmla<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "=w")
+ (match_operand:VDQW 1 "s_register_operand" "0")
+ (match_operand:VDQW 2 "s_register_operand" "w")
+ (match_operand:VDQW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmla<mode>_unspec"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w")]
+ UNSPEC_VMLA))]
"TARGET_NEON"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -1857,13 +1937,31 @@
(const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
)
-(define_insn "neon_vmls<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:VDQW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VMLS))]
+(define_expand "neon_vmls<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "=w")
+ (match_operand:VDQW 1 "s_register_operand" "0")
+ (match_operand:VDQW 2 "s_register_operand" "w")
+ (match_operand:VDQW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
+ operands[1], operands[2], operands[3]));
+ else
+ emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmls<mode>_unspec"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w")]
+ UNSPEC_VMLS))]
"TARGET_NEON"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -1973,11 +2071,27 @@
(const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
)
-(define_insn "neon_vsub<mode>"
+(define_expand "neon_vsub<mode>"
+ [(match_operand:VDQX 0 "s_register_operand" "=w")
+ (match_operand:VDQX 1 "s_register_operand" "w")
+ (match_operand:VDQX 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vsub<mode>_unspec"
[(set (match_operand:VDQX 0 "s_register_operand" "=w")
(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
- (match_operand:VDQX 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
+ (match_operand:VDQX 2 "s_register_operand" "w")]
UNSPEC_VSUB))]
"TARGET_NEON"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -2160,11 +2274,11 @@
(define_insn "neon_vaba<mode>"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "0")
- (match_operand:VDQIW 2 "s_register_operand" "w")
- (match_operand:VDQIW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VABA))]
+ (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0")
+ (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
+ (match_operand:VDQIW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ UNSPEC_VABD)))]
"TARGET_NEON"
"vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -2174,11 +2288,11 @@
(define_insn "neon_vabal<mode>"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
- (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
- (match_operand:VW 2 "s_register_operand" "w")
- (match_operand:VW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VABAL))]
+ (plus:<V_widen> (match_operand:<V_widen> 1 "s_register_operand" "0")
+ (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
+ (match_operand:VW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ UNSPEC_VABDL)))]
"TARGET_NEON"
"vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
[(set_attr "neon_type" "neon_vaba")]
@@ -2309,22 +2423,15 @@
(const_string "neon_fp_vrecps_vrsqrts_qqq")))]
)
-(define_insn "neon_vabs<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VABS))]
+(define_expand "neon_vabs<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "")
+ (match_operand:VDQW 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
- "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
- [(set (attr "neon_type")
- (if_then_else (ior (ne (symbol_ref "<Is_float_mode>") (const_int 0))
- (ne (symbol_ref "<Is_float_mode>") (const_int 0)))
- (if_then_else
- (ne (symbol_ref "<Is_d_reg>") (const_int 0))
- (const_string "neon_fp_vadd_ddd_vabs_dd")
- (const_string "neon_fp_vadd_qqq_vabs_qq"))
- (const_string "neon_vqneg_vqabs")))]
-)
+{
+ emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
+ DONE;
+})
(define_insn "neon_vqabs<mode>"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index d282c83..b5b9cab 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -709,7 +709,8 @@ let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
let ops =
[
(* Addition. *)
- Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
+ Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
+ Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
@@ -758,7 +759,8 @@ let ops =
Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
(* Subtraction. *)
- Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
+ Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
+ Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
diff --git a/gcc/doc/arm-neon-intrinsics.texi b/gcc/doc/arm-neon-intrinsics.texi
index d21fabd..a75e582 100644
--- a/gcc/doc/arm-neon-intrinsics.texi
+++ b/gcc/doc/arm-neon-intrinsics.texi
@@ -43,20 +43,18 @@
@itemize @bullet
-@item uint64x1_t vadd_u64 (uint64x1_t, uint64x1_t)
-@*@emph{Form of expected instruction(s):} @code{vadd.i64 @var{d0}, @var{d0}, @var{d0}}
+@item float32x2_t vadd_f32 (float32x2_t, float32x2_t)
+@*@emph{Form of expected instruction(s):} @code{vadd.f32 @var{d0}, @var{d0}, @var{d0}}
@end itemize
@itemize @bullet
-@item int64x1_t vadd_s64 (int64x1_t, int64x1_t)
-@*@emph{Form of expected instruction(s):} @code{vadd.i64 @var{d0}, @var{d0}, @var{d0}}
+@item uint64x1_t vadd_u64 (uint64x1_t, uint64x1_t)
@end itemize
@itemize @bullet
-@item float32x2_t vadd_f32 (float32x2_t, float32x2_t)
-@*@emph{Form of expected instruction(s):} @code{vadd.f32 @var{d0}, @var{d0}, @var{d0}}
+@item int64x1_t vadd_s64 (int64x1_t, int64x1_t)
@end itemize
@@ -1013,20 +1011,18 @@
@itemize @bullet
-@item uint64x1_t vsub_u64 (uint64x1_t, uint64x1_t)
-@*@emph{Form of expected instruction(s):} @code{vsub.i64 @var{d0}, @var{d0}, @var{d0}}
+@item float32x2_t vsub_f32 (float32x2_t, float32x2_t)
+@*@emph{Form of expected instruction(s):} @code{vsub.f32 @var{d0}, @var{d0}, @var{d0}}
@end itemize
@itemize @bullet
-@item int64x1_t vsub_s64 (int64x1_t, int64x1_t)
-@*@emph{Form of expected instruction(s):} @code{vsub.i64 @var{d0}, @var{d0}, @var{d0}}
+@item uint64x1_t vsub_u64 (uint64x1_t, uint64x1_t)
@end itemize
@itemize @bullet
-@item float32x2_t vsub_f32 (float32x2_t, float32x2_t)
-@*@emph{Form of expected instruction(s):} @code{vsub.f32 @var{d0}, @var{d0}, @var{d0}}
+@item int64x1_t vsub_s64 (int64x1_t, int64x1_t)
@end itemize
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7de49b0..f6c1ca0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,4 +1,18 @@
2010-07-02 Sandra Loosemore <sandra@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+
+ * gcc.target/arm/neon/vadds64.c: Regenerated.
+ * gcc.target/arm/neon/vaddu64.c: Regenerated.
+ * gcc.target/arm/neon/vsubs64.c: Regenerated.
+ * gcc.target/arm/neon/vsubu64.c: Regenerated.
+ * gcc.target/arm/neon-vmla-1.c: Add -ffast-math to options.
+ * gcc.target/arm/neon-vmls-1.c: Likewise.
+ * gcc.target/arm/neon-vsubs64.c: New execution test.
+ * gcc.target/arm/neon-vsubu64.c: New execution test.
+ * gcc.target/arm/neon-vadds64.c: New execution test.
+ * gcc.target/arm/neon-vaddu64.c: New execution test.
+
+2010-07-02 Sandra Loosemore <sandra@codesourcery.com>
* gcc.target/arm/neon-vands64.c: New.
* gcc.target/arm/neon-vandu64.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/neon-vadds64.c b/gcc/testsuite/gcc.target/arm/neon-vadds64.c
new file mode 100644
index 0000000..284a1d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vadds64.c
@@ -0,0 +1,21 @@
+/* Test the `vadd_s64' ARM Neon intrinsic. */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+ int64x1_t out_int64x1_t = 0;
+ int64x1_t arg0_int64x1_t = (int64x1_t)0xdeadbeef00000000LL;
+ int64x1_t arg1_int64x1_t = (int64x1_t)0x00000000deadbeefLL;
+
+ out_int64x1_t = vadd_s64 (arg0_int64x1_t, arg1_int64x1_t);
+ if (out_int64x1_t != (int64x1_t)0xdeadbeefdeadbeefLL)
+ abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddu64.c b/gcc/testsuite/gcc.target/arm/neon-vaddu64.c
new file mode 100644
index 0000000..05bda8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddu64.c
@@ -0,0 +1,21 @@
+/* Test the `vadd_u64' ARM Neon intrinsic. */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+ uint64x1_t out_uint64x1_t = 0;
+ uint64x1_t arg0_uint64x1_t = (uint64x1_t)0xdeadbeef00000000LL;
+ uint64x1_t arg1_uint64x1_t = (uint64x1_t)0x00000000deadbeefLL;
+
+ out_uint64x1_t = vadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
+ if (out_uint64x1_t != (uint64x1_t)0xdeadbeefdeadbeefLL)
+ abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c
index 336a53b..9d239ed 100644
--- a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c
+++ b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c
@@ -1,5 +1,5 @@
/* { dg-require-effective-target arm_neon_hw } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
/* { dg-add-options arm_neon } */
/* { dg-final { scan-assembler "vmla\\.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c
index 5e5e0c7..2beaebe 100644
--- a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c
+++ b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c
@@ -1,5 +1,5 @@
/* { dg-require-effective-target arm_neon_hw } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
/* { dg-add-options arm_neon } */
/* { dg-final { scan-assembler "vmls\\.f32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vsubs64.c b/gcc/testsuite/gcc.target/arm/neon-vsubs64.c
new file mode 100644
index 0000000..2394700
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vsubs64.c
@@ -0,0 +1,21 @@
+/* Test the `vsub_s64' ARM Neon intrinsic. */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+ int64x1_t out_int64x1_t = 0;
+ int64x1_t arg0_int64x1_t = (int64x1_t)0xdeadbeefdeadbeefLL;
+ int64x1_t arg1_int64x1_t = (int64x1_t)0x0000beefdead0000LL;
+
+ out_int64x1_t = vsub_s64 (arg0_int64x1_t, arg1_int64x1_t);
+ if (out_int64x1_t != (int64x1_t)0xdead00000000beefLL)
+ abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vsubu64.c b/gcc/testsuite/gcc.target/arm/neon-vsubu64.c
new file mode 100644
index 0000000..0162e20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vsubu64.c
@@ -0,0 +1,21 @@
+/* Test the `vsub_u64' ARM Neon intrinsic. */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+ uint64x1_t out_uint64x1_t = 0;
+ uint64x1_t arg0_uint64x1_t = (uint64x1_t)0xdeadbeefdeadbeefLL;
+ uint64x1_t arg1_uint64x1_t = (uint64x1_t)0x0000beefdead0000LL;
+
+ out_uint64x1_t = vsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
+ if (out_uint64x1_t != (uint64x1_t)0xdead00000000beefLL)
+ abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vadds64.c b/gcc/testsuite/gcc.target/arm/neon/vadds64.c
index d392377..fb17e0e 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vadds64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vadds64.c
@@ -17,5 +17,4 @@ void test_vadds64 (void)
out_int64x1_t = vadd_s64 (arg0_int64x1_t, arg1_int64x1_t);
}
-/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vaddu64.c b/gcc/testsuite/gcc.target/arm/neon/vaddu64.c
index 1114725..18fc500 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vaddu64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vaddu64.c
@@ -17,5 +17,4 @@ void test_vaddu64 (void)
out_uint64x1_t = vadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
}
-/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vsubs64.c b/gcc/testsuite/gcc.target/arm/neon/vsubs64.c
index 6560399..57bcd33 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vsubs64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vsubs64.c
@@ -17,5 +17,4 @@ void test_vsubs64 (void)
out_int64x1_t = vsub_s64 (arg0_int64x1_t, arg1_int64x1_t);
}
-/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vsubu64.c b/gcc/testsuite/gcc.target/arm/neon/vsubu64.c
index 5e4a2a8..3a8ae46 100644
--- a/gcc/testsuite/gcc.target/arm/neon/vsubu64.c
+++ b/gcc/testsuite/gcc.target/arm/neon/vsubu64.c
@@ -17,5 +17,4 @@ void test_vsubu64 (void)
out_uint64x1_t = vsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
}
-/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */