diff options
author | Cui, Lili <lili.cui@intel.com> | 2024-11-26 15:10:23 +0800 |
---|---|---|
committer | Cui, Lili <lili.cui@intel.com> | 2024-11-26 15:14:43 +0800 |
commit | 60b708a9c878aff9a76ec0d446ae63e6527327a6 (patch) | |
tree | 245336e7de82c341c760062f24c0d52e5b19729b /gcc/config/i386 | |
parent | efb1d2e2368e60da3c691ee3cb510ee690d1fa2a (diff) | |
download | gcc-60b708a9c878aff9a76ec0d446ae63e6527327a6.zip gcc-60b708a9c878aff9a76ec0d446ae63e6527327a6.tar.gz gcc-60b708a9c878aff9a76ec0d446ae63e6527327a6.tar.bz2 |
Optimize 128-bit vector permutation with pand, pandn and por.
This patch introduces a new subroutine in ix86_expand_vec_perm_const_1.
On x86, use mixed constant permutation for V8HImode and V16QImode when
SSE2 is supported. This patch handles certain vector shuffle operations
more efficiently using pand, pandn, and por. This change is intended to
improve assembly code generation for configurations that support SSE2.
gcc/ChangeLog:
PR target/116675
* config/i386/i386-expand.cc (expand_vec_perm_pand_pandn_por):
New subroutine.
(ix86_expand_vec_perm_const_1): Call expand_vec_perm_pand_pandn_por.
gcc/testsuite/ChangeLog:
PR target/116675
* gcc.target/i386/pr116675.c: New test.
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index f8dcce4..2eb6197 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -23102,6 +23102,53 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) return true; } +/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement a + permutation (which is a bland) with and, andnot and or when pshufb is not available. + + It handles case: + __builtin_shufflevector (v1, v2, 0, 9, 2, 11, 4, 13, 6, 15); + __builtin_shufflevector (v1, v2, 8, 1, 2, 11, 4, 13, 6, 15); + + An element[i] must be chosen between op0[i] and op1[i] to satisfy the + requirement. + */ + +static bool +expand_vec_perm_pand_pandn_por (struct expand_vec_perm_d *d) +{ + rtx rperm[16], vperm; + unsigned int i, nelt = d->nelt; + + if (!TARGET_SSE2 + || d->one_operand_p + || (d->vmode != V16QImode && d->vmode != V8HImode)) + return false; + + if (d->perm[0] != 0) + return false; + + /* The dest[i] must select an element between op0[i] and op1[i]. */ + for (i = 1; i < nelt; i++) + if ((d->perm[i] % nelt) != i) + return false; + + if (d->testing_p) + return true; + + /* Generates a blend mask for the operators AND and ANDNOT. */ + machine_mode inner_mode = GET_MODE_INNER (d->vmode); + for (i = 0; i < nelt; i++) + rperm[i] = (d->perm[i] < nelt) ? CONSTM1_RTX (inner_mode) + : CONST0_RTX (inner_mode); + + vperm = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (nelt, rperm)); + vperm = force_reg (d->vmode, vperm); + + ix86_expand_sse_movcc (d->target, vperm, d->op0, d->op1); + + return true; +} + /* Implement permutation with pslldq + psrldq + por when pshufb is not available. */ static bool @@ -24161,6 +24208,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_psrlw_psllw_por (d)) return true; + if (expand_vec_perm_pand_pandn_por (d)) + return true; + /* Try sequences of four instructions. */ if (expand_vec_perm_even_odd_trunc (d)) |