aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2022-09-21 14:56:08 +0800
committerliuhongt <hongtao.liu@intel.com>2022-09-26 11:20:46 +0800
commit3db8e9c2422d924a958336fd0871b24cce3e65d1 (patch)
tree65ef5d298143750d8328fe627a80ca4d932f28df /gcc/config
parentde613c6295ea50d75167eaf89f41074a69298108 (diff)
downloadgcc-3db8e9c2422d924a958336fd0871b24cce3e65d1.zip
gcc-3db8e9c2422d924a958336fd0871b24cce3e65d1.tar.gz
gcc-3db8e9c2422d924a958336fd0871b24cce3e65d1.tar.bz2
Support 2-instruction vector shuffle for V4SI/V4SF in ix86_expand_vec_perm_const_1.
2022-09-23 Hongtao Liu <hongtao.liu@intel.com> Liwei Xu <liwei.xu@intel.com> gcc/ChangeLog: PR target/53346 * config/i386/i386-expand.cc (expand_vec_perm_shufps_shufps): New function. (ix86_expand_vec_perm_const_1): Insert expand_vec_perm_shufps_shufps at the end of 2-instruction expand sequence. gcc/testsuite/ChangeLog: * gcc.target/i386/pr53346-1.c: New test. * gcc.target/i386/pr53346-2.c: New test. * gcc.target/i386/pr53346-3.c: New test. * gcc.target/i386/pr53346-4.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-expand.cc116
1 files changed, 116 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5334363..6baff6d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19604,6 +19604,119 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
return false;
}
+/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
+ in terms of a pair of shufps+ shufps/pshufd instructions. */
+static bool
+expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d)
+{
+ unsigned char perm1[4];
+ machine_mode vmode = d->vmode;
+ bool ok;
+ unsigned i, j, k, count = 0;
+
+ if (d->one_operand_p
+ || (vmode != V4SImode && vmode != V4SFmode))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ for (i = 0; i < 4; ++i)
+ count += d->perm[i] > 3 ? 1 : 0;
+
+ gcc_assert (count & 3);
+
+ rtx tmp = gen_reg_rtx (vmode);
+ /* 2 from op0 and 2 from op1. */
+ if (count == 2)
+ {
+ unsigned char perm2[4];
+ for (i = 0, j = 0, k = 2; i < 4; ++i)
+ if (d->perm[i] & 4)
+ {
+ perm1[k++] = d->perm[i];
+ perm2[i] = k - 1;
+ }
+ else
+ {
+ perm1[j++] = d->perm[i];
+ perm2[i] = j - 1;
+ }
+
+ /* shufps. */
+ ok = expand_vselect_vconcat (tmp, d->op0, d->op1,
+ perm1, d->nelt, false);
+ gcc_assert (ok);
+ if (vmode == V4SImode && TARGET_SSE2)
+ /* pshufd. */
+ ok = expand_vselect (d->target, tmp,
+ perm2, d->nelt, false);
+ else
+ {
+ /* shufps. */
+ perm2[2] += 4;
+ perm2[3] += 4;
+ ok = expand_vselect_vconcat (d->target, tmp, tmp,
+ perm2, d->nelt, false);
+ }
+ gcc_assert (ok);
+ }
+ /* 3 from one op and 1 from another. */
+ else
+ {
+ unsigned pair_idx = 8, lone_idx = 8, shift;
+
+ /* Find the lone index. */
+ for (i = 0; i < 4; ++i)
+ if ((d->perm[i] > 3 && count == 1)
+ || (d->perm[i] < 4 && count == 3))
+ lone_idx = i;
+
+ /* When lone_idx is not 0, it must from second op(count == 1). */
+ gcc_assert (count == (lone_idx ? 1 : 3));
+
+ /* Find the pair index that sits in the same half as the lone index. */
+ shift = lone_idx & 2;
+ pair_idx = 1 - lone_idx + 2 * shift;
+
+ /* First permutate lone index and pair index into the same vector as
+ [ lone, lone, pair, pair ]. */
+ perm1[1] = perm1[0]
+ = (count == 3) ? d->perm[lone_idx] : d->perm[lone_idx] - 4;
+ perm1[3] = perm1[2]
+ = (count == 3) ? d->perm[pair_idx] : d->perm[pair_idx] + 4;
+
+ /* Alway put the vector contains lone indx at the first. */
+ if (count == 1)
+ std::swap (d->op0, d->op1);
+
+ /* shufps. */
+ ok = expand_vselect_vconcat (tmp, d->op0, d->op1,
+ perm1, d->nelt, false);
+ gcc_assert (ok);
+
+ /* Refine lone and pair index to original order. */
+ perm1[shift] = lone_idx << 1;
+ perm1[shift + 1] = pair_idx << 1;
+
+ /* Select the remaining 2 elements in another vector. */
+ for (i = 2 - shift; i < 4 - shift; ++i)
+ perm1[i] = lone_idx == 1 ? d->perm[i] + 4 : d->perm[i];
+
+ /* Adjust to original selector. */
+ if (lone_idx > 1)
+ std::swap (tmp, d->op1);
+
+ /* shufps. */
+ ok = expand_vselect_vconcat (d->target, tmp, d->op1,
+ perm1, d->nelt, false);
+
+ gcc_assert (ok);
+ }
+
+ return true;
+}
+
/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
in terms of a pair of pshuflw + pshufhw instructions. */
@@ -22152,6 +22265,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_2perm_pblendv (d, true))
return true;
+ if (expand_vec_perm_shufps_shufps (d))
+ return true;
+
/* Try sequences of three instructions. */
if (expand_vec_perm_even_odd_pack (d))