aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-12-20 11:54:43 +0800
committerliuhongt <hongtao.liu@intel.com>2024-05-07 15:44:35 +0800
commita9f642783853b60bb0a59562b8ab3ed10ec01641 (patch)
treef0feae61a361b0161acbbaa6d551a787f170da53 /gcc/config
parent0822400aae8f79de8f10ddde268f592ba6c2d2fb (diff)
downloadgcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.zip
gcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.tar.gz
gcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.tar.bz2
Optimize 64-bit vector permutation with punpcklqdq + 128-bit vector pshuf.
gcc/ChangeLog: PR target/113090 * config/i386/i386-expand.cc (expand_vec_perm_punpckldq_pshuf): New function. (ix86_expand_vec_perm_const_1): Try expand_vec_perm_punpckldq_pshuf for sequence of 2 instructions. gcc/testsuite/ChangeLog: * gcc.target/i386/pr113090.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-expand.cc71
1 files changed, 71 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a613291..2f27bfb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -21173,6 +21173,74 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
return true;
}
+/* Try to permute 2 64-bit vectors by punpckldq + 128-bit vector shuffle. */
+static bool
+expand_vec_perm_punpckldq_pshuf (struct expand_vec_perm_d *d)
+{
+ if (GET_MODE_BITSIZE (d->vmode) != 64
+ || !TARGET_MMX_WITH_SSE
+ || d->one_operand_p)
+ return false;
+
+ machine_mode widen_vmode;
+ switch (d->vmode)
+ {
+ /* pshufd. */
+ case E_V2SImode:
+ widen_vmode = V4SImode;
+ break;
+
+ /* pshufd. */
+ case E_V2SFmode:
+ widen_vmode = V4SFmode;
+ break;
+
+ case E_V4HImode:
+ widen_vmode = V8HImode;
+ /* pshufb. */
+ if (!TARGET_SSSE3)
+ return false;
+ break;
+
+ case E_V8QImode:
+ /* pshufb. */
+ widen_vmode = V16QImode;
+ if (!TARGET_SSSE3)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (d->testing_p)
+ return true;
+
+ struct expand_vec_perm_d dperm;
+ dperm.target = gen_reg_rtx (widen_vmode);
+ rtx op0 = gen_reg_rtx (widen_vmode);
+ emit_move_insn (op0, gen_rtx_VEC_CONCAT (widen_vmode, d->op0, d->op1));
+ dperm.op0 = op0;
+ dperm.op1 = op0;
+ dperm.vmode = widen_vmode;
+ unsigned nelt = GET_MODE_NUNITS (widen_vmode);
+ dperm.nelt = nelt;
+ dperm.one_operand_p = true;
+ dperm.testing_p = false;
+
+ for (unsigned i = 0; i != nelt / 2; i++)
+ {
+ dperm.perm[i] = d->perm[i];
+ dperm.perm[i + nelt / 2] = d->perm[i];
+ }
+
+ gcc_assert (expand_vec_perm_1 (&dperm));
+ emit_move_insn (d->target, lowpart_subreg (d->vmode,
+ dperm.target,
+ dperm.vmode));
+ return true;
+}
+
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
the permutation using the SSSE3 palignr instruction. This succeeds
when all of the elements in PERM fit within one vector and we merely
@@ -23685,6 +23753,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_shufps_shufps (d))
return true;
+ if (expand_vec_perm_punpckldq_pshuf (d))
+ return true;
+
/* Try sequences of three instructions. */
if (expand_vec_perm_even_odd_pack (d))