diff options
author | Ulrich Weigand <ulrich.weigand@linaro.org> | 2011-04-18 07:14:22 +0000 |
---|---|---|
committer | Ira Rosen <irar@gcc.gnu.org> | 2011-04-18 07:14:22 +0000 |
commit | 7e7cfcf6eb7ed4bbc1cc7d8c4f689fa72cc7deb0 (patch) | |
tree | 82fa305bb3e3f3e1635e6e51c7bf01c0d5a1cbfb | |
parent | 49eab32e6e79af5ef80832d058539c8d73a74ef9 (diff) | |
download | gcc-7e7cfcf6eb7ed4bbc1cc7d8c4f689fa72cc7deb0.zip gcc-7e7cfcf6eb7ed4bbc1cc7d8c4f689fa72cc7deb0.tar.gz gcc-7e7cfcf6eb7ed4bbc1cc7d8c4f689fa72cc7deb0.tar.bz2 |
re PR target/48252 (ARM neon: problem with consecutive vzip, vuzp and vtrn)
PR target/48252
* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
to match neon_vzip/vuzp/vtrn_internal.
* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
outputs explicitly dependent on both inputs.
(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
Co-Authored-By: Ira Rosen <ira.rosen@linaro.org>
From-SVN: r172639
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 2 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 41 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/pr48252.c | 31 |
5 files changed, 70 insertions, 20 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5963d44..30398d2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org> + Ira Rosen <ira.rosen@linaro.org> + + PR target/48252 + * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments + to match neon_vzip/vuzp/vtrn_internal. + * config/arm/neon.md (neon_vtrn<mode>_internal): Make both + outputs explicitly dependent on both inputs. + (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. + 2011-04-18 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/48616 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index dc45eb0..aa13e2b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19632,7 +19632,7 @@ neon_emit_pair_result_insn (enum machine_mode mode, rtx tmp1 = gen_reg_rtx (mode); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (intfn (tmp1, op1, tmp2, op2)); + emit_insn (intfn (tmp1, op1, op2, tmp2)); emit_move_insn (mem, tmp1); mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 8231782..a975d97 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4092,13 +4092,14 @@ (define_insn "neon_vtrn<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VTRN2))] - "TARGET_NEON" - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VTRN2))] + "TARGET_NEON" + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -4118,13 +4119,14 @@ (define_insn "neon_vzip<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VZIP2))] - "TARGET_NEON" - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VZIP2))] + "TARGET_NEON" + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -4144,13 +4146,14 @@ (define_insn "neon_vuzp<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VUZP2))] + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VUZP2))] "TARGET_NEON" - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b95bb3f..f7d9784 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org> + Ira Rosen <ira.rosen@linaro.org> + + PR target/48252 + * gcc.target/arm/pr48252.c: New test. + 2011-04-18 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/48616 diff --git a/gcc/testsuite/gcc.target/arm/pr48252.c b/gcc/testsuite/gcc.target/arm/pr48252.c new file mode 100644 index 0000000..1a06c71 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr48252.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main(void) +{ + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; + uint8x8x2_t vd1, vd2; + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; + int i; + + vd1 = vzip_u8(v1, vdup_n_u8(0)); + vd2 = vzip_u8(v2, vdup_n_u8(0)); + + vst1_u8(d1.buf, vd1.val[0]); + vst1_u8(d2.buf, vd1.val[1]); + vst1_u8(d3.buf, vd2.val[0]); + vst1_u8(d4.buf, vd2.val[1]); + + for (i = 0; i < 8; i++) + if ((i % 2 == 0 && d4.buf[i] != 2) + || (i % 2 == 1 && d4.buf[i] != 0)) + abort (); + + return 0; +} |