aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2014-10-02 09:29:49 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2014-10-02 09:29:49 +0200
commit8a605c51cbb8f14cbd4d3bfd0d697924cb49b214 (patch)
tree246551f633d709c97d0fb8662459a1c8bec949d8 /gcc
parent5ae5a2386e7ddfeb79bb703c7d81282bd0a0d885 (diff)
downloadgcc-8a605c51cbb8f14cbd4d3bfd0d697924cb49b214.zip
gcc-8a605c51cbb8f14cbd4d3bfd0d697924cb49b214.tar.gz
gcc-8a605c51cbb8f14cbd4d3bfd0d697924cb49b214.tar.bz2
re PR target/62128 (Use vpalignr for AVX2 rotation)
PR target/62128 * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr if it expands to a single insn only. (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument. If true, fail unless in_order is true. Add forward declaration. (expand_vec_perm_vperm2f128): Fix up comment about which permutation is useful for one_operand_p. (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller. From-SVN: r215796
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/i386/i386.c23
2 files changed, 28 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8600040..24e007d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2014-10-02 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/62128
+ * config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr
+ if it expands to a single insn only.
+ (expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument. If true,
+ fail unless in_order is true. Add forward declaration.
+ (expand_vec_perm_vperm2f128): Fix up comment about which permutation
+ is useful for one_operand_p.
+ (ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller.
+
2014-10-01 Jan Hubicka <hubicka@ucw.cz>
* cgraphclones.c (build_function_type_skip_args): Do not make new
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8b70f6f..8fdc4141 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39636,6 +39636,7 @@ struct expand_vec_perm_d
static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
@@ -43225,6 +43226,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_pshufb (d))
return true;
+ /* Try the AVX2 vpalignr instruction. */
+ if (expand_vec_perm_palignr (d, true))
+ return true;
+
/* Try the AVX512F vpermi2 instructions. */
rtx vec[64];
enum machine_mode mode = d->vmode;
@@ -43286,10 +43291,11 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
the permutation using the SSSE3 palignr instruction. This succeeds
when all of the elements in PERM fit within one vector and we merely
need to shift them down so that a single vector permutation has a
- chance to succeed. */
+ chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
+ the vpalignr instruction itself can perform the requested permutation. */
static bool
-expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
{
unsigned i, nelt = d->nelt;
unsigned min, max;
@@ -43320,8 +43326,9 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
/* Given that we have SSSE3, we know we'll be able to implement the
single operand permutation after the palignr with pshufb for
- 128-bit vectors. */
- if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
+ 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
+ first. */
+ if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
return true;
dcopy = *d;
@@ -43342,6 +43349,9 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
}
dcopy.one_operand_p = true;
+ if (single_insn_only_p && !in_order)
+ return false;
+
/* For AVX2, test whether we can permute the result in one instruction. */
if (d->testing_p)
{
@@ -43922,7 +43932,8 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
return true;
}
- /* For one operand, the only useful vperm2f128 permutation is 0x10. */
+ /* For one operand, the only useful vperm2f128 permutation is 0x01
+ aka lanes swap. */
if (d->one_operand_p)
return false;
}
@@ -44811,7 +44822,7 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_pshuflw_pshufhw (d))
return true;
- if (expand_vec_perm_palignr (d))
+ if (expand_vec_perm_palignr (d, false))
return true;
if (expand_vec_perm_interleave2 (d))