aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2022-01-05 20:06:03 +0100
committerUros Bizjak <ubizjak@gmail.com>2022-01-05 20:07:22 +0100
commit877c9e332f9b2b6eacd6ed4edf5790ee0f41a68f (patch)
treed7726b9369ae020e8c36961056586715aaab7e8a /gcc
parent85a3442c85aedb00c59e986f16cccbb8ec60d777 (diff)
downloadgcc-877c9e332f9b2b6eacd6ed4edf5790ee0f41a68f.zip
gcc-877c9e332f9b2b6eacd6ed4edf5790ee0f41a68f.tar.gz
gcc-877c9e332f9b2b6eacd6ed4edf5790ee0f41a68f.tar.bz2
i386: Fix expand_vec_perm_pshufb for narrow modes [PR103905]
2022-01-05 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103905 * config/i386/i386-expand.c (expand_vec_perm_pshufb): Fix number of narrow mode remapped elements for !one_operand_p case. gcc/testsuite/ChangeLog: PR target/103905 * gcc.target/i386/pr103905.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103905.c25
2 files changed, 37 insertions, 11 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index e93ef1c..9bd8e53 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -18730,7 +18730,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
unsigned i, nelt, eltsz, mask;
unsigned char perm[64];
- machine_mode vmode = V16QImode;
+ machine_mode vmode;
struct expand_vec_perm_d nd;
rtx rperm[64], vperm, target, op0, op1;
@@ -18754,6 +18754,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
case 16:
if (!TARGET_XOP)
return false;
+ vmode = V16QImode;
break;
case 32:
@@ -18803,6 +18804,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
case 16:
if (!TARGET_SSSE3)
return false;
+ vmode = V16QImode;
break;
case 32:
@@ -18894,6 +18896,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
/* Or if vpermps can be used. */
else if (d->vmode == V16SFmode)
vmode = V16SImode;
+
if (vmode == V64QImode)
{
/* vpshufb only works intra lanes, it is not
@@ -18946,8 +18949,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
machine_mode vpmode = vmode;
- if (vmode == V4QImode
- || vmode == V8QImode)
+ nelt = GET_MODE_SIZE (vmode);
+
+ /* Emulate narrow modes with V16QI instructions. */
+ if (nelt < 16)
{
rtx m128 = GEN_INT (-128);
@@ -18955,19 +18960,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
account for inactive top elements from the first operand. */
if (!d->one_operand_p)
{
- int sz = GET_MODE_SIZE (vmode);
-
for (i = 0; i < nelt; ++i)
{
- int ival = INTVAL (rperm[i]);
- if (ival >= sz)
- ival += 16-sz;
- rperm[i] = GEN_INT (ival);
+ unsigned ival = UINTVAL (rperm[i]);
+ if (ival >= nelt)
+ rperm[i] = GEN_INT (ival + 16 - nelt);
}
}
- /* V4QI/V8QI is emulated with V16QI instruction, fill inactive
- elements in the top positions with zeros. */
+ /* Fill inactive elements in the top positions with zeros. */
for (i = nelt; i < 16; ++i)
rperm[i] = m128;
diff --git a/gcc/testsuite/gcc.target/i386/pr103905.c b/gcc/testsuite/gcc.target/i386/pr103905.c
new file mode 100644
index 0000000..aef9c4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103905.c
@@ -0,0 +1,25 @@
+/* PR target/103905 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O3 -mxop" } */
+
+#include "xop-check.h"
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+ for (int i = 0; i < n; ++i)
+ perm[i] = i;
+}
+
+static void
+xop_test (void)
+{
+ foo (8);
+
+ if (perm[7] != 7)
+ __builtin_abort ();
+}