aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMarc Glisse <marc.glisse@inria.fr>2012-05-14 22:19:30 +0200
committerMarc Glisse <glisse@gcc.gnu.org>2012-05-14 20:19:30 +0000
commit6015a67d39faf6fe3e0d35629f9ac9e02d4f54e5 (patch)
tree08232444cf180fcf4bb31c04ffb7ce0a55bc482f /gcc
parent80b91c0b39e7d65e3d89e49b5bf2ec40927f5a33 (diff)
downloadgcc-6015a67d39faf6fe3e0d35629f9ac9e02d4f54e5.zip
gcc-6015a67d39faf6fe3e0d35629f9ac9e02d4f54e5.tar.gz
gcc-6015a67d39faf6fe3e0d35629f9ac9e02d4f54e5.tar.bz2
re PR target/52607 (v4df __builtin_shuffle with {0,2,1,3} or {1,3,0,2})
2012-05-14 Marc Glisse <marc.glisse@inria.fr> PR target/52607 * config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ... (canonicalize_perm): ... new function. (expand_vec_perm_2vperm2f128_vshuf): New function. (ix86_expand_vec_perm_const_1): Call it. From-SVN: r187479
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.c136
2 files changed, 111 insertions, 33 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index da88c52..dc9b9a1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2012-05-14 Marc Glisse <marc.glisse@inria.fr>
+
+ PR target/52607
+ * config/i386/i386.c (ix86_expand_vec_perm_const): Move code to ...
+ (canonicalize_perm): ... new function.
+ (expand_vec_perm_2vperm2f128_vshuf): New function.
+ (ix86_expand_vec_perm_const_1): Call it.
+
2012-05-14 Andrew Pinski <apinski@cavium.com>
H.J. Lu <hongjiu.lu@intel.com>
Jakub Jelinek <jakub@redhat.com>
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ad4739b..6cc64fb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -33343,6 +33343,7 @@ struct expand_vec_perm_d
bool testing_p;
};
+static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
@@ -37400,6 +37401,57 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
return true;
}
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
+ permutation using two vperm2f128, followed by a vshufpd insn blending
+ the two vectors together. */
+
+static bool
+expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
+{
+ struct expand_vec_perm_d dfirst, dsecond, dthird;
+ bool ok;
+
+ if (!TARGET_AVX || (d->vmode != V4DFmode))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ dfirst = *d;
+ dsecond = *d;
+ dthird = *d;
+
+ dfirst.perm[0] = (d->perm[0] & ~1);
+ dfirst.perm[1] = (d->perm[0] & ~1) + 1;
+ dfirst.perm[2] = (d->perm[2] & ~1);
+ dfirst.perm[3] = (d->perm[2] & ~1) + 1;
+ dsecond.perm[0] = (d->perm[1] & ~1);
+ dsecond.perm[1] = (d->perm[1] & ~1) + 1;
+ dsecond.perm[2] = (d->perm[3] & ~1);
+ dsecond.perm[3] = (d->perm[3] & ~1) + 1;
+ dthird.perm[0] = (d->perm[0] % 2);
+ dthird.perm[1] = (d->perm[1] % 2) + 4;
+ dthird.perm[2] = (d->perm[2] % 2) + 2;
+ dthird.perm[3] = (d->perm[3] % 2) + 6;
+
+ dfirst.target = gen_reg_rtx (dfirst.vmode);
+ dsecond.target = gen_reg_rtx (dsecond.vmode);
+ dthird.op0 = dfirst.target;
+ dthird.op1 = dsecond.target;
+ dthird.one_operand_p = false;
+
+ canonicalize_perm (&dfirst);
+ canonicalize_perm (&dsecond);
+
+ ok = expand_vec_perm_1 (&dfirst)
+ && expand_vec_perm_1 (&dsecond)
+ && expand_vec_perm_1 (&dthird);
+
+ gcc_assert (ok);
+
+ return true;
+}
+
/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
permutation with two pshufb insns and an ior. We should have already
failed all two instruction sequences. */
@@ -38049,6 +38101,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
/* Try sequences of three instructions. */
+ if (expand_vec_perm_2vperm2f128_vshuf (d))
+ return true;
+
if (expand_vec_perm_pshufb2 (d))
return true;
@@ -38086,12 +38141,56 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
+/* If a permutation only uses one operand, make it clear. Returns true
+ if the permutation references both operands. */
+
+static bool
+canonicalize_perm (struct expand_vec_perm_d *d)
+{
+ int i, which, nelt = d->nelt;
+
+ for (i = which = 0; i < nelt; ++i)
+ which |= (d->perm[i] < nelt ? 1 : 2);
+
+ d->one_operand_p = true;
+ switch (which)
+ {
+ default:
+ gcc_unreachable();
+
+ case 3:
+ if (!rtx_equal_p (d->op0, d->op1))
+ {
+ d->one_operand_p = false;
+ break;
+ }
+ /* The elements of PERM do not suggest that only the first operand
+ is used, but both operands are identical. Allow easier matching
+ of the permutation by folding the permutation into the single
+ input vector. */
+ /* FALLTHRU */
+
+ case 2:
+ for (i = 0; i < nelt; ++i)
+ d->perm[i] &= nelt - 1;
+ d->op0 = d->op1;
+ break;
+
+ case 1:
+ d->op1 = d->op0;
+ break;
+ }
+
+ return (which == 3);
+}
+
bool
ix86_expand_vec_perm_const (rtx operands[4])
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
- int i, nelt, which;
+ int i, nelt;
+ bool two_args;
rtx sel;
d.target = operands[0];
@@ -38108,44 +38207,15 @@ ix86_expand_vec_perm_const (rtx operands[4])
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
- for (i = which = 0; i < nelt; ++i)
+ for (i = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
-
- which |= (ei < nelt ? 1 : 2);
d.perm[i] = ei;
perm[i] = ei;
}
- d.one_operand_p = true;
- switch (which)
- {
- default:
- gcc_unreachable();
-
- case 3:
- if (!rtx_equal_p (d.op0, d.op1))
- {
- d.one_operand_p = false;
- break;
- }
- /* The elements of PERM do not suggest that only the first operand
- is used, but both operands are identical. Allow easier matching
- of the permutation by folding the permutation into the single
- input vector. */
- /* FALLTHRU */
-
- case 2:
- for (i = 0; i < nelt; ++i)
- d.perm[i] &= nelt - 1;
- d.op0 = d.op1;
- break;
-
- case 1:
- d.op1 = d.op0;
- break;
- }
+ two_args = canonicalize_perm (&d);
if (ix86_expand_vec_perm_const_1 (&d))
return true;
@@ -38154,7 +38224,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
same, the above tried to expand with one_operand_p and flattened selector.
If that didn't work, retry without one_operand_p; we succeeded with that
during testing. */
- if (which == 3 && d.one_operand_p)
+ if (two_args && d.one_operand_p)
{
d.one_operand_p = false;
memcpy (d.perm, perm, sizeof (perm));