diff options
author | Sandra Loosemore <sandra@codesourcery.com> | 2010-07-02 20:31:43 -0400 |
---|---|---|
committer | Sandra Loosemore <sandra@gcc.gnu.org> | 2010-07-02 20:31:43 -0400 |
commit | bab53516d0a58120f1d3b24aabc09a3d7e6443aa (patch) | |
tree | 716708677aed26e5c65cf3ff167cec3906175513 | |
parent | b614e5669ebff10b7ec9b5fff4e539ab1416e38b (diff) | |
download | gcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.zip gcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.tar.gz gcc-bab53516d0a58120f1d3b24aabc09a3d7e6443aa.tar.bz2 |
neon.md (UNSPEC_VABA): Delete.
2010-07-02 Sandra Loosemore <sandra@codesourcery.com>
Julian Brown <julian@codesourcery.com>
gcc/
* config/arm/neon.md (UNSPEC_VABA): Delete.
(UNSPEC_VABAL): Delete.
(UNSPEC_VABS): Delete.
(UNSPEC_VMUL_N): Delete.
(adddi3_neon): New.
(subdi3_neon): New.
(mul<mode>3add<mode>_neon): Make the pattern named.
(mul<mode>3neg<mode>add<mode>_neon): Likewise.
(neon_vadd<mode>): Replace with define_expand, and move the remaining
unspec parts...
(neon_vadd<mode>_unspec): ...to this.
(neon_vmla<mode>, neon_vmla<mode>_unspec): Likewise.
(neon_vlms<mode>, neon_vmls<mode>_unspec): Likewise.
(neon_vsub<mode>, neon_vsub<mode>_unspec): Likewise.
(neon_vaba<mode>): Rewrite in terms of vabd.
(neon_vabal<mode>): Rewrite in terms of vabdl.
(neon_vabs<mode>): Rewrite without unspec.
* config/arm/arm.md (*arm_adddi3): Disable for TARGET_NEON.
(*arm_subdi3): Likewise.
* config/arm/neon.ml (Vadd, Vsub): Split out 64-bit variants and add
No_op attribute to disable assembly output checks.
* config/arm/arm_neon.h: Regenerated.
* doc/arm-neon-intrinsics.texi: Regenerated.
gcc/testsuite/
* gcc.target/arm/neon/vadds64.c: Regenerated.
* gcc.target/arm/neon/vaddu64.c: Regenerated.
* gcc.target/arm/neon/vsubs64.c: Regenerated.
* gcc.target/arm/neon/vsubu64.c: Regenerated.
* gcc.target/arm/neon-vmla-1.c: Add -ffast-math to options.
* gcc.target/arm/neon-vmls-1.c: Likewise.
* gcc.target/arm/neon-vsubs64.c: New execution test.
* gcc.target/arm/neon-vsubu64.c: New execution test.
* gcc.target/arm/neon-vadds64.c: New execution test.
* gcc.target/arm/neon-vaddu64.c: New execution test.
Co-Authored-By: Julian Brown <julian@codesourcery.com>
From-SVN: r161762
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 7 | ||||
-rw-r--r-- | gcc/config/arm/arm_neon.h | 24 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 219 | ||||
-rw-r--r-- | gcc/config/arm/neon.ml | 6 | ||||
-rw-r--r-- | gcc/doc/arm-neon-intrinsics.texi | 20 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vadds64.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddu64.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vmla-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vmls-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vsubs64.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vsubu64.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vadds64.c | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vaddu64.c | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vsubs64.c | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon/vsubu64.c | 1 |
17 files changed, 318 insertions, 91 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 529d635..3943f8f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2010-07-02 Sandra Loosemore <sandra@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * config/arm/neon.md (UNSPEC_VABA): Delete. + (UNSPEC_VABAL): Delete. + (UNSPEC_VABS): Delete. + (UNSPEC_VMUL_N): Delete. + (adddi3_neon): New. + (subdi3_neon): New. + (mul<mode>3add<mode>_neon): Make the pattern named. + (mul<mode>3neg<mode>add<mode>_neon): Likewise. + (neon_vadd<mode>): Replace with define_expand, and move the remaining + unspec parts... + (neon_vadd<mode>_unspec): ...to this. + (neon_vmla<mode>, neon_vmla<mode>_unspec): Likewise. + (neon_vlms<mode>, neon_vmls<mode>_unspec): Likewise. + (neon_vsub<mode>, neon_vsub<mode>_unspec): Likewise. + (neon_vaba<mode>): Rewrite in terms of vabd. + (neon_vabal<mode>): Rewrite in terms of vabdl. + (neon_vabs<mode>): Rewrite without unspec. + * config/arm/arm.md (*arm_adddi3): Disable for TARGET_NEON. + (*arm_subdi3): Likewise. + * config/arm/neon.ml (Vadd, Vsub): Split out 64-bit variants and add + No_op attribute to disable assembly output checks. + * config/arm/arm_neon.h: Regenerated. + * doc/arm-neon-intrinsics.texi: Regenerated. + 2010-07-02 Jan Hubicka <jh@suse.cz> * ipa-split.c (split_function): For aggregate values set return_slot_opt; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index f490966..fbbe7f4 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -495,9 +495,10 @@ (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0") (match_operand:DI 2 "s_register_operand" "r, 0"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)" + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON" "#" - "TARGET_32BIT && reload_completed" + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))" [(parallel [(set (reg:CC_C CC_REGNUM) (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1))) @@ -994,7 +995,7 @@ (minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0") (match_operand:DI 2 "s_register_operand" "r,0,0"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NEON" "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2" [(set_attr "conds" "clob") (set_attr "length" "8")] diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index a6085bb..9cba0a9 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -414,12 +414,6 @@ vadd_s32 (int32x2_t __a, int32x2_t __b) return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vadd_s64 (int64x1_t __a, int64x1_t __b) -{ - return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vadd_f32 (float32x2_t __a, float32x2_t __b) { @@ -444,6 +438,12 @@ vadd_u32 (uint32x2_t __a, uint32x2_t __b) return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); } +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vadd_u64 (uint64x1_t __a, uint64x1_t __b) { @@ -1368,12 +1368,6 @@ vsub_s32 (int32x2_t __a, int32x2_t __b) return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vsub_s64 (int64x1_t __a, int64x1_t __b) -{ - return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vsub_f32 (float32x2_t __a, float32x2_t __b) { @@ -1398,6 +1392,12 @@ vsub_u32 (uint32x2_t __a, uint32x2_t __b) return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); } +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsub_u64 (uint64x1_t __a, uint64x1_t __b) { diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 1b25842..48965ca 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -22,11 +22,8 @@ (define_constants [(UNSPEC_ASHIFT_SIGNED 65) (UNSPEC_ASHIFT_UNSIGNED 66) - (UNSPEC_VABA 67) - (UNSPEC_VABAL 68) (UNSPEC_VABD 69) (UNSPEC_VABDL 70) - (UNSPEC_VABS 71) (UNSPEC_VADD 72) (UNSPEC_VADDHN 73) (UNSPEC_VADDL 74) @@ -75,7 +72,6 @@ (UNSPEC_VMULL 128) (UNSPEC_VMUL_LANE 129) (UNSPEC_VMULL_LANE 130) - (UNSPEC_VMUL_N 131) (UNSPEC_VPADAL 135) (UNSPEC_VPADD 136) (UNSPEC_VPADDL 137) @@ -816,11 +812,8 @@ ;; Doubleword and quadword arithmetic. -;; NOTE: vadd/vsub and some other instructions also support 64-bit integer -;; element size, which we could potentially use for "long long" operations. We -;; don't want to do this at present though, because moving values from the -;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is -;; easy to do with ARM instructions anyway. +;; NOTE: some other instructions also support 64-bit integer +;; element size, which we could potentially use for "long long" operations. (define_insn "*add<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") @@ -836,6 +829,26 @@ (const_string "neon_int_1")))] ) +(define_insn "adddi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0") + (match_operand:DI 2 "s_register_operand" "w,r,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vadd.i64\t%P0, %P1, %P2"; + case 1: return "#"; + case 2: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "conds" "*,clob,clob") + (set_attr "length" "*,8,8")] +) + (define_insn "*sub<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") @@ -850,6 +863,27 @@ (const_string "neon_int_2")))] ) +(define_insn "subdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0") + (match_operand:DI 2 "s_register_operand" "w,r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vsub.i64\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 2: /* fall through */ + case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_2,*,*,*") + (set_attr "conds" "*,clob,clob,clob") + (set_attr "length" "*,8,8,8")] +) + (define_insn "*mul<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w") @@ -871,7 +905,7 @@ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] ) -(define_insn "*mul<mode>3add<mode>_neon" +(define_insn "mul<mode>3add<mode>_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") (match_operand:VDQ 3 "s_register_operand" "w")) @@ -893,7 +927,7 @@ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] ) -(define_insn "*mul<mode>3neg<mode>add<mode>_neon" +(define_insn "mul<mode>3neg<mode>add<mode>_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0") (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") @@ -1718,11 +1752,37 @@ ; good for plain vadd, vaddq. -(define_insn "neon_vadd<mode>" +(define_expand "neon_vadd<mode>" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Note that NEON operations don't support the full IEEE 754 standard: in +; particular, denormal values are flushed to zero. This means that GCC cannot +; use those instructions for autovectorization, etc. unless +; -funsafe-math-optimizations is in effect (in which case flush-to-zero +; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h +; header) must work in either case: if -funsafe-math-optimizations is given, +; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics +; expand to unspecs (which may potentially limit the extent to which they might +; be optimized by generic code). + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vadd<mode>_unspec" [(set (match_operand:VDQX 0 "s_register_operand" "=w") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] + (match_operand:VDQX 2 "s_register_operand" "w")] UNSPEC_VADD))] "TARGET_NEON" "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" @@ -1795,6 +1855,8 @@ [(set_attr "neon_type" "neon_int_4")] ) +;; We cannot replace this unspec with mul<mode>3 because of the odd +;; polynomial multiplication case that can specified by operand 3. (define_insn "neon_vmul<mode>" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") @@ -1818,13 +1880,31 @@ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] ) -(define_insn "neon_vmla<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:VDQW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VMLA))] +(define_expand "neon_vmla<mode>" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmla<mode>_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLA))] "TARGET_NEON" "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -1857,13 +1937,31 @@ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] ) -(define_insn "neon_vmls<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:VDQW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VMLS))] +(define_expand "neon_vmls<mode>" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], + operands[1], operands[2], operands[3])); + else + emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmls<mode>_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLS))] "TARGET_NEON" "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -1973,11 +2071,27 @@ (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] ) -(define_insn "neon_vsub<mode>" +(define_expand "neon_vsub<mode>" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vsub<mode>_unspec" [(set (match_operand:VDQX 0 "s_register_operand" "=w") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] + (match_operand:VDQX 2 "s_register_operand" "w")] UNSPEC_VSUB))] "TARGET_NEON" "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" @@ -2160,11 +2274,11 @@ (define_insn "neon_vaba<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "0") - (match_operand:VDQIW 2 "s_register_operand" "w") - (match_operand:VDQIW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VABA))] + (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0") + (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:VDQIW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABD)))] "TARGET_NEON" "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -2174,11 +2288,11 @@ (define_insn "neon_vabal<mode>" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") - (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") - (match_operand:VW 2 "s_register_operand" "w") - (match_operand:VW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VABAL))] + (plus:<V_widen> (match_operand:<V_widen> 1 "s_register_operand" "0") + (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABDL)))] "TARGET_NEON" "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3" [(set_attr "neon_type" "neon_vaba")] @@ -2309,22 +2423,15 @@ (const_string "neon_fp_vrecps_vrsqrts_qqq")))] ) -(define_insn "neon_vabs<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VABS))] +(define_expand "neon_vabs<mode>" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] "TARGET_NEON" - "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" - [(set (attr "neon_type") - (if_then_else (ior (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (ne (symbol_ref "<Is_float_mode>") (const_int 0))) - (if_then_else - (ne (symbol_ref "<Is_d_reg>") (const_int 0)) - (const_string "neon_fp_vadd_ddd_vabs_dd") - (const_string "neon_fp_vadd_qqq_vabs_qq")) - (const_string "neon_vqneg_vqabs")))] -) +{ + emit_insn (gen_abs<mode>2 (operands[0], operands[1])); + DONE; +}) (define_insn "neon_vqabs<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index d282c83..b5b9cab 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -709,7 +709,8 @@ let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 let ops = [ (* Addition. *) - Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64; + Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32; + Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64]; Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; Vadd, [], Long, "vaddl", elts_same_2, su_8_32; Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; @@ -758,7 +759,8 @@ let ops = Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; (* Subtraction. *) - Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64; + Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; + Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; Vsub, [], Long, "vsubl", elts_same_2, su_8_32; Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; diff --git a/gcc/doc/arm-neon-intrinsics.texi b/gcc/doc/arm-neon-intrinsics.texi index d21fabd..a75e582 100644 --- a/gcc/doc/arm-neon-intrinsics.texi +++ b/gcc/doc/arm-neon-intrinsics.texi @@ -43,20 +43,18 @@ @itemize @bullet -@item uint64x1_t vadd_u64 (uint64x1_t, uint64x1_t) -@*@emph{Form of expected instruction(s):} @code{vadd.i64 @var{d0}, @var{d0}, @var{d0}} +@item float32x2_t vadd_f32 (float32x2_t, float32x2_t) +@*@emph{Form of expected instruction(s):} @code{vadd.f32 @var{d0}, @var{d0}, @var{d0}} @end itemize @itemize @bullet -@item int64x1_t vadd_s64 (int64x1_t, int64x1_t) -@*@emph{Form of expected instruction(s):} @code{vadd.i64 @var{d0}, @var{d0}, @var{d0}} +@item uint64x1_t vadd_u64 (uint64x1_t, uint64x1_t) @end itemize @itemize @bullet -@item float32x2_t vadd_f32 (float32x2_t, float32x2_t) -@*@emph{Form of expected instruction(s):} @code{vadd.f32 @var{d0}, @var{d0}, @var{d0}} +@item int64x1_t vadd_s64 (int64x1_t, int64x1_t) @end itemize @@ -1013,20 +1011,18 @@ @itemize @bullet -@item uint64x1_t vsub_u64 (uint64x1_t, uint64x1_t) -@*@emph{Form of expected instruction(s):} @code{vsub.i64 @var{d0}, @var{d0}, @var{d0}} +@item float32x2_t vsub_f32 (float32x2_t, float32x2_t) +@*@emph{Form of expected instruction(s):} @code{vsub.f32 @var{d0}, @var{d0}, @var{d0}} @end itemize @itemize @bullet -@item int64x1_t vsub_s64 (int64x1_t, int64x1_t) -@*@emph{Form of expected instruction(s):} @code{vsub.i64 @var{d0}, @var{d0}, @var{d0}} +@item uint64x1_t vsub_u64 (uint64x1_t, uint64x1_t) @end itemize @itemize @bullet -@item float32x2_t vsub_f32 (float32x2_t, float32x2_t) -@*@emph{Form of expected instruction(s):} @code{vsub.f32 @var{d0}, @var{d0}, @var{d0}} +@item int64x1_t vsub_s64 (int64x1_t, int64x1_t) @end itemize diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7de49b0..f6c1ca0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,4 +1,18 @@ 2010-07-02 Sandra Loosemore <sandra@codesourcery.com> + Julian Brown <julian@codesourcery.com> + + * gcc.target/arm/neon/vadds64.c: Regenerated. + * gcc.target/arm/neon/vaddu64.c: Regenerated. + * gcc.target/arm/neon/vsubs64.c: Regenerated. + * gcc.target/arm/neon/vsubu64.c: Regenerated. + * gcc.target/arm/neon-vmla-1.c: Add -ffast-math to options. + * gcc.target/arm/neon-vmls-1.c: Likewise. + * gcc.target/arm/neon-vsubs64.c: New execution test. + * gcc.target/arm/neon-vsubu64.c: New execution test. + * gcc.target/arm/neon-vadds64.c: New execution test. + * gcc.target/arm/neon-vaddu64.c: New execution test. + +2010-07-02 Sandra Loosemore <sandra@codesourcery.com> * gcc.target/arm/neon-vands64.c: New. * gcc.target/arm/neon-vandu64.c: New. diff --git a/gcc/testsuite/gcc.target/arm/neon-vadds64.c b/gcc/testsuite/gcc.target/arm/neon-vadds64.c new file mode 100644 index 0000000..284a1d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vadds64.c @@ -0,0 +1,21 @@ +/* Test the `vadd_s64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O0" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main (void) +{ + int64x1_t out_int64x1_t = 0; + int64x1_t arg0_int64x1_t = (int64x1_t)0xdeadbeef00000000LL; + int64x1_t arg1_int64x1_t = (int64x1_t)0x00000000deadbeefLL; + + out_int64x1_t = vadd_s64 (arg0_int64x1_t, arg1_int64x1_t); + if (out_int64x1_t != (int64x1_t)0xdeadbeefdeadbeefLL) + abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddu64.c b/gcc/testsuite/gcc.target/arm/neon-vaddu64.c new file mode 100644 index 0000000..05bda8b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddu64.c @@ -0,0 +1,21 @@ +/* Test the `vadd_u64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O0" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main (void) +{ + uint64x1_t out_uint64x1_t = 0; + uint64x1_t arg0_uint64x1_t = (uint64x1_t)0xdeadbeef00000000LL; + uint64x1_t arg1_uint64x1_t = (uint64x1_t)0x00000000deadbeefLL; + + out_uint64x1_t = vadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t); + if (out_uint64x1_t != (uint64x1_t)0xdeadbeefdeadbeefLL) + abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c index 336a53b..9d239ed 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c +++ b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target arm_neon_hw } */ -/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-add-options arm_neon } */ /* { dg-final { scan-assembler "vmla\\.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c index 5e5e0c7..2beaebe 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c +++ b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target arm_neon_hw } */ -/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ /* { dg-add-options arm_neon } */ /* { dg-final { scan-assembler "vmls\\.f32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vsubs64.c b/gcc/testsuite/gcc.target/arm/neon-vsubs64.c new file mode 100644 index 0000000..2394700 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vsubs64.c @@ -0,0 +1,21 @@ +/* Test the `vsub_s64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O0" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main (void) +{ + int64x1_t out_int64x1_t = 0; + int64x1_t arg0_int64x1_t = (int64x1_t)0xdeadbeefdeadbeefLL; + int64x1_t arg1_int64x1_t = (int64x1_t)0x0000beefdead0000LL; + + out_int64x1_t = vsub_s64 (arg0_int64x1_t, arg1_int64x1_t); + if (out_int64x1_t != (int64x1_t)0xdead00000000beefLL) + abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon-vsubu64.c b/gcc/testsuite/gcc.target/arm/neon-vsubu64.c new file mode 100644 index 0000000..0162e20 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vsubu64.c @@ -0,0 +1,21 @@ +/* Test the `vsub_u64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O0" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main (void) +{ + uint64x1_t out_uint64x1_t = 0; + uint64x1_t arg0_uint64x1_t = (uint64x1_t)0xdeadbeefdeadbeefLL; + uint64x1_t arg1_uint64x1_t = (uint64x1_t)0x0000beefdead0000LL; + + out_uint64x1_t = vsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t); + if (out_uint64x1_t != (uint64x1_t)0xdead00000000beefLL) + abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/neon/vadds64.c b/gcc/testsuite/gcc.target/arm/neon/vadds64.c index d392377..fb17e0e 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vadds64.c +++ b/gcc/testsuite/gcc.target/arm/neon/vadds64.c @@ -17,5 +17,4 @@ void test_vadds64 (void) out_int64x1_t = vadd_s64 (arg0_int64x1_t, arg1_int64x1_t); } -/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vaddu64.c b/gcc/testsuite/gcc.target/arm/neon/vaddu64.c index 1114725..18fc500 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vaddu64.c +++ b/gcc/testsuite/gcc.target/arm/neon/vaddu64.c @@ -17,5 +17,4 @@ void test_vaddu64 (void) out_uint64x1_t = vadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t); } -/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vsubs64.c b/gcc/testsuite/gcc.target/arm/neon/vsubs64.c index 6560399..57bcd33 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vsubs64.c +++ b/gcc/testsuite/gcc.target/arm/neon/vsubs64.c @@ -17,5 +17,4 @@ void test_vsubs64 (void) out_int64x1_t = vsub_s64 (arg0_int64x1_t, arg1_int64x1_t); } -/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vsubu64.c b/gcc/testsuite/gcc.target/arm/neon/vsubu64.c index 5e4a2a8..3a8ae46 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vsubu64.c +++ b/gcc/testsuite/gcc.target/arm/neon/vsubu64.c @@ -17,5 +17,4 @@ void test_vsubu64 (void) out_uint64x1_t = vsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t); } -/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ |