diff options
author | Jakub Jelinek <jakub@redhat.com> | 2015-12-04 09:25:24 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2015-12-04 09:25:24 +0100 |
commit | e315eea9d086ffef705ae96a04ef1b15cfec7745 (patch) | |
tree | 66a206f2410fa1fee6325a6afb029b5952cbd15a /gcc | |
parent | 5505978ab31300aa02f321f97af9512916ed8595 (diff) | |
download | gcc-e315eea9d086ffef705ae96a04ef1b15cfec7745.zip gcc-e315eea9d086ffef705ae96a04ef1b15cfec7745.tar.gz gcc-e315eea9d086ffef705ae96a04ef1b15cfec7745.tar.bz2 |
re PR target/68655 (SSE2 cannot vec_perm of low and high part)
PR target/68655
* config/i386/i386.c (canonicalize_vector_int_perm): New function.
(expand_vec_perm_1): Use it and recurse if everything else
failed. Use nd.perm instead of perm2.
(expand_vec_perm_even_odd_1): If testing_p, use gen_raw_REG
instead of gen_lowpart for the target.
(ix86_expand_vec_perm_const_1): Use canonicalize_vector_int_perm
and recurse if everything else failed.
* gcc.dg/torture/vshuf-4.inc (TESTS): Add one extra test.
* gcc.dg/torture/vshuf-4.inc (TESTS): Add two extra tests.
From-SVN: r231247
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 103 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/vshuf-4.inc | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/vshuf-8.inc | 4 |
5 files changed, 108 insertions, 19 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fc026c5..7394e28 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2015-12-04 Jakub Jelinek <jakub@redhat.com> + + PR target/68655 + * config/i386/i386.c (canonicalize_vector_int_perm): New function. + (expand_vec_perm_1): Use it and recurse if everything else + failed. Use nd.perm instead of perm2. + (expand_vec_perm_even_odd_1): If testing_p, use gen_raw_REG + instead of gen_lowpart for the target. + (ix86_expand_vec_perm_const_1): Use canonicalize_vector_int_perm + and recurse if everything else failed. + 2015-12-04 Richard Biener <rguenther@suse.de> PR middle-end/68636 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cd44375..4247af3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -49336,6 +49336,57 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) return true; } +/* For V*[QHS]Imode permutations, check if the same permutation + can't be performed in a 2x, 4x or 8x wider inner mode. */ + +static bool +canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, + struct expand_vec_perm_d *nd) +{ + int i; + enum machine_mode mode = VOIDmode; + + switch (d->vmode) + { + case V16QImode: mode = V8HImode; break; + case V32QImode: mode = V16HImode; break; + case V64QImode: mode = V32HImode; break; + case V8HImode: mode = V4SImode; break; + case V16HImode: mode = V8SImode; break; + case V32HImode: mode = V16SImode; break; + case V4SImode: mode = V2DImode; break; + case V8SImode: mode = V4DImode; break; + case V16SImode: mode = V8DImode; break; + default: return false; + } + for (i = 0; i < d->nelt; i += 2) + if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) + return false; + nd->vmode = mode; + nd->nelt = d->nelt / 2; + for (i = 0; i < nd->nelt; i++) + nd->perm[i] = d->perm[2 * i] / 2; + if (GET_MODE_INNER (mode) != DImode) + canonicalize_vector_int_perm (nd, nd); + if (nd != d) + { + nd->one_operand_p = d->one_operand_p; + nd->testing_p = d->testing_p; + if (d->op0 == d->op1) + nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); + else + { + nd->op0 = gen_lowpart (nd->vmode, d->op0); + nd->op1 = gen_lowpart (nd->vmode, d->op1); + } + if (d->testing_p) + nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); + else + nd->target = gen_reg_rtx (nd->vmode); + } + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D in a single instruction. */ @@ -49343,7 +49394,7 @@ static bool expand_vec_perm_1 (struct expand_vec_perm_d *d) { unsigned i, nelt = d->nelt; - unsigned char perm2[MAX_VECT_LEN]; + struct expand_vec_perm_d nd; /* Check plain VEC_SELECT first, because AVX has instructions that could match both SEL and SEL+CONCAT, but the plain SEL will allow a memory @@ -49356,10 +49407,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) for (i = 0; i < nelt; i++) { - perm2[i] = d->perm[i] & mask; - if (perm2[i] != i) + nd.perm[i] = d->perm[i] & mask; + if (nd.perm[i] != i) identity_perm = false; - if (perm2[i]) + if (nd.perm[i]) broadcast_perm = false; } @@ -49428,7 +49479,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) } } - if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p)) + if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) return true; /* There are plenty of patterns in sse.md that are written for @@ -49439,10 +49490,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) every other permutation operand. */ for (i = 0; i < nelt; i += 2) { - perm2[i] = d->perm[i] & mask; - perm2[i + 1] = (d->perm[i + 1] & mask) + nelt; + nd.perm[i] = d->perm[i] & mask; + nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; } - if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, d->testing_p)) return true; @@ -49451,13 +49502,13 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) { for (i = 0; i < nelt; i += 4) { - perm2[i + 0] = d->perm[i + 0] & mask; - perm2[i + 1] = d->perm[i + 1] & mask; - perm2[i + 2] = (d->perm[i + 2] & mask) + nelt; - perm2[i + 3] = (d->perm[i + 3] & mask) + nelt; + nd.perm[i + 0] = d->perm[i + 0] & mask; + nd.perm[i + 1] = d->perm[i + 1] & mask; + nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; + nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; } - if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, d->testing_p)) return true; } @@ -49478,10 +49529,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) e -= nelt; else e += nelt; - perm2[i] = e; + nd.perm[i] = e; } - if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, d->testing_p)) return true; } @@ -49507,6 +49558,14 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; + /* See if we can get the same permutation in different vector integer + mode. */ + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } return false; } @@ -50939,7 +50998,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V4DFmode; if (d->testing_p) - d_copy.target = gen_lowpart (V4DFmode, d->target); + d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); else d_copy.target = gen_reg_rtx (V4DFmode); d_copy.op0 = gen_lowpart (V4DFmode, d->op0); @@ -50978,7 +51037,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V8SFmode; if (d->testing_p) - d_copy.target = gen_lowpart (V8SFmode, d->target); + d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); else d_copy.target = gen_reg_rtx (V8SFmode); d_copy.op0 = gen_lowpart (V8SFmode, d->op0); @@ -51422,6 +51481,16 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vpshufb4_vpermq2 (d)) return true; + /* See if we can get the same permutation in different vector integer + mode. */ + struct expand_vec_perm_d nd; + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } + return false; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9cbd390..2e93d1a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-12-04 Jakub Jelinek <jakub@redhat.com> + + PR target/68655 + * gcc.dg/torture/vshuf-4.inc (TESTS): Add one extra test. + * gcc.dg/torture/vshuf-4.inc (TESTS): Add two extra tests. + 2015-12-03 Bernd Schmidt <bschmidt@redhat.com> PR target/68472 diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc index d0cb738..d041b33 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc @@ -24,7 +24,8 @@ T (20, 0, 4, 1, 5) \ T (21, 2, 6, 3, 7) \ T (22, 1, 2, 3, 0) \ T (23, 2, 1, 0, 3) \ -T (24, 2, 5, 6, 3) +T (24, 2, 5, 6, 3) \ +T (25, 0, 1, 4, 5) #define EXPTESTS \ T (116, 1, 2, 4, 3) \ T (117, 7, 3, 3, 0) \ diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-8.inc b/gcc/testsuite/gcc.dg/torture/vshuf-8.inc index a39d71d..d7a7610 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-8.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-8.inc @@ -23,7 +23,9 @@ T (19, 7, 6, 5, 4, 3, 2, 1, 0) \ T (20, 0, 8, 1, 9, 2, 10, 3, 11) \ T (21, 4, 12, 5, 13, 6, 14, 7, 15) \ T (22, 1, 2, 3, 4, 5, 6, 7, 0) \ -T (23, 6, 5, 4, 3, 2, 1, 0, 7) +T (23, 6, 5, 4, 3, 2, 1, 0, 7) \ +T (24, 0, 1, 2, 3, 8, 9, 10, 11) \ +T (25, 0, 1, 2, 3, 12, 13, 14, 15) #define EXPTESTS \ T (116, 9, 3, 9, 4, 7, 0, 0, 6) \ T (117, 4, 14, 12, 8, 9, 6, 0, 10) \ |