diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-12-20 11:54:43 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2024-05-07 15:44:35 +0800 |
commit | a9f642783853b60bb0a59562b8ab3ed10ec01641 (patch) | |
tree | f0feae61a361b0161acbbaa6d551a787f170da53 /gcc/config | |
parent | 0822400aae8f79de8f10ddde268f592ba6c2d2fb (diff) | |
download | gcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.zip gcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.tar.gz gcc-a9f642783853b60bb0a59562b8ab3ed10ec01641.tar.bz2 |
Optimize 64-bit vector permutation with punpcklqdq + 128-bit vector pshuf.
gcc/ChangeLog:
PR target/113090
* config/i386/i386-expand.cc
(expand_vec_perm_punpckldq_pshuf): New function.
(ix86_expand_vec_perm_const_1): Try
expand_vec_perm_punpckldq_pshuf for sequence of 2
instructions.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr113090.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a613291..2f27bfb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -21173,6 +21173,74 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) return true; } +/* Try to permute 2 64-bit vectors by punpckldq + 128-bit vector shuffle. */ +static bool +expand_vec_perm_punpckldq_pshuf (struct expand_vec_perm_d *d) +{ + if (GET_MODE_BITSIZE (d->vmode) != 64 + || !TARGET_MMX_WITH_SSE + || d->one_operand_p) + return false; + + machine_mode widen_vmode; + switch (d->vmode) + { + /* pshufd. */ + case E_V2SImode: + widen_vmode = V4SImode; + break; + + /* pshufd. */ + case E_V2SFmode: + widen_vmode = V4SFmode; + break; + + case E_V4HImode: + widen_vmode = V8HImode; + /* pshufb. */ + if (!TARGET_SSSE3) + return false; + break; + + case E_V8QImode: + /* pshufb. */ + widen_vmode = V16QImode; + if (!TARGET_SSSE3) + return false; + break; + + default: + return false; + } + + if (d->testing_p) + return true; + + struct expand_vec_perm_d dperm; + dperm.target = gen_reg_rtx (widen_vmode); + rtx op0 = gen_reg_rtx (widen_vmode); + emit_move_insn (op0, gen_rtx_VEC_CONCAT (widen_vmode, d->op0, d->op1)); + dperm.op0 = op0; + dperm.op1 = op0; + dperm.vmode = widen_vmode; + unsigned nelt = GET_MODE_NUNITS (widen_vmode); + dperm.nelt = nelt; + dperm.one_operand_p = true; + dperm.testing_p = false; + + for (unsigned i = 0; i != nelt / 2; i++) + { + dperm.perm[i] = d->perm[i]; + dperm.perm[i + nelt / 2] = d->perm[i]; + } + + gcc_assert (expand_vec_perm_1 (&dperm)); + emit_move_insn (d->target, lowpart_subreg (d->vmode, + dperm.target, + dperm.vmode)); + return true; +} + /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify the permutation using the SSSE3 palignr instruction. This succeeds when all of the elements in PERM fit within one vector and we merely @@ -23685,6 +23753,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_shufps_shufps (d)) return true; + if (expand_vec_perm_punpckldq_pshuf (d)) + return true; + /* Try sequences of three instructions. */ if (expand_vec_perm_even_odd_pack (d)) |