diff options
author | Jakub Jelinek <jakub@redhat.com> | 2014-10-03 20:16:09 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2014-10-03 20:16:09 +0200 |
commit | 76e06b73eeb9c3c01a6e360825454e1810c96b55 (patch) | |
tree | 2c961bb0d0ad39f2f4cb513d06c39347a9bd3159 | |
parent | 7705dfd11ffaabc5eeb7e7da66e138258a6594ea (diff) | |
download | gcc-76e06b73eeb9c3c01a6e360825454e1810c96b55.zip gcc-76e06b73eeb9c3c01a6e360825454e1810c96b55.tar.gz gcc-76e06b73eeb9c3c01a6e360825454e1810c96b55.tar.bz2 |
re PR tree-optimization/61403 (An opportunity for x86 gcc vectorizer (~40% gain))
PR tree-optimization/61403
* config/i386/i386.c (expand_vec_perm_palignr): Fix a spelling
error in comment. Also optimize 256-bit vectors for AVX2
or AVX (floating vectors only), provided the first permutation
can be performed in one insn.
* gcc.dg/torture/vshuf-32.inc: Add a new test 29.
Co-Authored-By: Uros Bizjak <ubizjak@gmail.com>
From-SVN: r215866
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/vshuf-32.inc | 3 |
4 files changed, 30 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8d2521e..708dca1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2014-10-03 Jakub Jelinek <jakub@redhat.com> + Uros Bizjak <ubizjak@gmail.com> + + PR tree-optimization/61403 + * config/i386/i386.c (expand_vec_perm_palignr): Fix a spelling + error in comment. Also optimize 256-bit vectors for AVX2 + or AVX (floating vectors only), provided the first permutation + can be performed in one insn. + 2014-10-03 David Malcolm <dmalcolm@redhat.com> * gcc.c (class driver): New class. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c4f97c0..2f36c52 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -43422,7 +43422,7 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify the permutation using the SSE4_1 pblendv instruction. Potentially - reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */ + reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ static bool expand_vec_perm_pblendv (struct expand_vec_perm_d *d) @@ -43432,11 +43432,14 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d) enum machine_mode vmode = d->vmode; bool ok; - /* Use the same checks as in expand_vec_perm_blend, but skipping - AVX and AVX2 as they require more than 2 instructions. */ + /* Use the same checks as in expand_vec_perm_blend. */ if (d->one_operand_p) return false; - if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) + if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) + ; + else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) + ; + else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) ; else return false; @@ -43458,7 +43461,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d) respective lanes and 8 >= 8, but 2 not. */ if (which != 1 && which != 2) return false; - if (d->testing_p) + if (d->testing_p && GET_MODE_SIZE (vmode) == 16) return true; /* First we apply one operand permutation to the part where @@ -43474,7 +43477,12 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d) dcopy.perm[i] = d->perm[i] & (nelt - 1); ok = expand_vec_perm_1 (&dcopy); - gcc_assert (ok); + if (GET_MODE_SIZE (vmode) != 16 && !ok) + return false; + else + gcc_assert (ok); + if (d->testing_p) + return true; /* Next we put permuted elements into their positions. */ dcopy1 = *d; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7f4de02..f8671a6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-10-03 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/61403 + * gcc.dg/torture/vshuf-32.inc: Add a new test 29. + 2014-10-03 Marek Polacek <polacek@redhat.com> * gcc.dg/gomp/appendix-a/a.35.4.c: Fix implicit declarations. diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-32.inc b/gcc/testsuite/gcc.dg/torture/vshuf-32.inc index e6425ce..ca975a2 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-32.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-32.inc @@ -28,7 +28,8 @@ T (24, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 1, 4, 7, 10, 13, 16, 19, 22, 25, T (25, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42) \ T (26, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52) \ T (27, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53) \ -T (28, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16, 17, 18, 19, 20, 21) +T (28, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 1, 2, 3, 4, 5, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16, 17, 18, 19, 20, 21) \ +T (29, 0, 43, 2, 3, 57, 5, 6, 7, 8, 53, 40, 11, 12, 13, 42, 15, 16, 40, 18, 19, 20, 21, 22, 23, 24, 25, 36, 58, 36, 29, 30, 31) #define EXPTESTS \ T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \ T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \ |