aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeny Stupachenko <evstupac@gmail.com>2014-06-10 12:40:16 +0000
committerKirill Yukhin <kyukhin@gcc.gnu.org>2014-06-10 12:40:16 +0000
commit6ba197c1d9422431246f910275aa6ac67471b581 (patch)
treeb6b70abe5403207c90e60614768f8f034736df19
parent5933f9ae4f8d7b9dec44ec95e70fcc543a616839 (diff)
downloadgcc-6ba197c1d9422431246f910275aa6ac67471b581.zip
gcc-6ba197c1d9422431246f910275aa6ac67471b581.tar.gz
gcc-6ba197c1d9422431246f910275aa6ac67471b581.tar.bz2
i386.c (expand_vec_perm_pblendv): New.
gcc/ * config/i386/i386.c (expand_vec_perm_pblendv): New. * config/i386/i386.c (ix86_expand_vec_perm_const_1): Use expand_vec_perm_pblendv. From-SVN: r211407
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/i386.c77
2 files changed, 83 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bf68f34..3adb61b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2014-06-10 Evgeny Stupachenko <evstupac@gmail.com>
+
+ * config/i386/i386.c (expand_vec_perm_pblendv): New.
+ * config/i386/i386.c (ix86_expand_vec_perm_const_1): Use
+ expand_vec_perm_pblendv.
+
2014-06-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* doc/arm-acle-intrinsics.texi: Specify when CRC32 intrinsics are
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2b5fbf7..2050aaf0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -43201,6 +43201,80 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
return ok;
}
+/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
+ the permutation using the SSE4_1 pblendv instruction. Potentially
+ reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
+
+static bool
+expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
+{
+ unsigned i, which, nelt = d->nelt;
+ struct expand_vec_perm_d dcopy, dcopy1;
+ enum machine_mode vmode = d->vmode;
+ bool ok;
+
+ /* Use the same checks as in expand_vec_perm_blend, but skipping
+ AVX2 as it requires more than 2 instructions for general case. */
+ if (d->one_operand_p)
+ return false;
+ if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
+ ;
+ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
+ ;
+ else
+ return false;
+
+ /* Figure out where permutation elements stay not in their
+ respective lanes. */
+ for (i = 0, which = 0; i < nelt; ++i)
+ {
+ unsigned e = d->perm[i];
+ if (e != i)
+ which |= (e < nelt ? 1 : 2);
+ }
+ /* We can pblend the part where elements stay not in their
+ respective lanes only when these elements are all in one
+ half of a permutation.
+ {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
+ lanes, but both 8 and 9 >= 8
+ {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
+ respective lanes and 8 >= 8, but 2 not. */
+ if (which != 1 && which != 2)
+ return false;
+ if (d->testing_p)
+ return true;
+
+ /* First we apply one operand permutation to the part where
+ elements stay not in their respective lanes. */
+ dcopy = *d;
+ if (which == 2)
+ dcopy.op0 = dcopy.op1 = d->op1;
+ else
+ dcopy.op0 = dcopy.op1 = d->op0;
+ dcopy.one_operand_p = true;
+
+ for (i = 0; i < nelt; ++i)
+ dcopy.perm[i] = d->perm[i] & (nelt - 1);
+
+ ok = expand_vec_perm_1 (&dcopy);
+ gcc_assert (ok);
+
+ /* Next we put permuted elements into their positions. */
+ dcopy1 = *d;
+ if (which == 2)
+ dcopy1.op1 = dcopy.target;
+ else
+ dcopy1.op0 = dcopy.target;
+
+ for (i = 0; i < nelt; ++i)
+ dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
+
+ ok = expand_vec_perm_blend (&dcopy1);
+ gcc_assert (ok);
+
+ return true;
+}
+
static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
@@ -44573,6 +44647,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_vperm2f128 (d))
return true;
+ if (expand_vec_perm_pblendv (d))
+ return true;
+
/* Try sequences of three instructions. */
if (expand_vec_perm_2vperm2f128_vshuf (d))