aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorIlya Tocar <ilya.tocar@intel.com>2014-12-08 11:45:35 +0000
committerIlya Tocar <tocarip@gcc.gnu.org>2014-12-08 14:45:35 +0300
commit9f9f61155a4836aa57e19405a95852172670b8ef (patch)
tree727614011060b194e8be795773a0cd86576d9c4a /gcc
parentd3fb44cbf6834f43cd359b531bf5df8aad185f39 (diff)
downloadgcc-9f9f61155a4836aa57e19405a95852172670b8ef.zip
gcc-9f9f61155a4836aa57e19405a95852172670b8ef.tar.gz
gcc-9f9f61155a4836aa57e19405a95852172670b8ef.tar.bz2
Enable const permutations for V64QImode.
gcc/ * config/i386/i386.c (expand_vec_perm_broadcast_1): Handle v64qi. (expand_vec_perm_vpermi2_vpshub2): New. (ix86_expand_vec_perm_const_1): Use it. (ix86_vectorize_vec_perm_const_ok): Handle v64qi. * config/i386/sse.md (VEC_PERM_CONST): Add v64qi. From-SVN: r218482
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.c81
-rw-r--r--gcc/config/i386/sse.md2
3 files changed, 90 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6f9096b..15f5f26 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2014-12-08 Ilya Tocar <ilya.tocar@intel.com>
+
+ * config/i386/i386.c (expand_vec_perm_broadcast_1): Handle v64qi.
+ (expand_vec_perm_vpermi2_vpshub2): New.
+ (ix86_expand_vec_perm_const_1): Use it.
+ (ix86_vectorize_vec_perm_const_ok): Handle v64qi.
+ * config/i386/sse.md (VEC_PERM_CONST): Add v64qi.
+
2014-12-08 Ilya Enkovich <ilya.enkovich@intel.com>
* tree-chkp.c (chkp_build_returned_bound): Don't predict
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index aaf0b38..88dd9f4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -48882,6 +48882,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
return true;
+ case V64QImode:
case V32QImode:
case V16HImode:
case V8SImode:
@@ -48915,6 +48916,78 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
return expand_vec_perm_broadcast_1 (d);
}
+/* Implement arbitrary permutations of two V64QImode operands
+ will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
+static bool
+expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
+{
+ if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ struct expand_vec_perm_d ds[2];
+ rtx rperm[128], vperm, target0, target1;
+ unsigned int i, nelt;
+ machine_mode vmode;
+
+ nelt = d->nelt;
+ vmode = V64QImode;
+
+ for (i = 0; i < 2; i++)
+ {
+ ds[i] = *d;
+ ds[i].vmode = V32HImode;
+ ds[i].nelt = 32;
+ ds[i].target = gen_reg_rtx (V32HImode);
+ ds[i].op0 = gen_lowpart (V32HImode, d->op0);
+ ds[i].op1 = gen_lowpart (V32HImode, d->op1);
+ }
+
+ /* Prepare permutations such that the first one takes care of
+ putting the even bytes into the right positions or one higher
+ positions (ds[0]) and the second one takes care of
+ putting the odd bytes into the right positions or one below
+ (ds[1]). */
+
+ for (i = 0; i < nelt; i++)
+ {
+ ds[i & 1].perm[i / 2] = d->perm[i] / 2;
+ if (i & 1)
+ {
+ rperm[i] = constm1_rtx;
+ rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
+ }
+ else
+ {
+ rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
+ rperm[i + 64] = constm1_rtx;
+ }
+ }
+
+ bool ok = expand_vec_perm_1 (&ds[0]);
+ gcc_assert (ok);
+ ds[0].target = gen_lowpart (V64QImode, ds[0].target);
+
+ ok = expand_vec_perm_1 (&ds[1]);
+ gcc_assert (ok);
+ ds[1].target = gen_lowpart (V64QImode, ds[1].target);
+
+ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
+ vperm = force_reg (vmode, vperm);
+ target0 = gen_reg_rtx (V64QImode);
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
+
+ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
+ vperm = force_reg (vmode, vperm);
+ target1 = gen_reg_rtx (V64QImode);
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
+
+ emit_insn (gen_iorv64qi3 (d->target, target0, target1));
+ return true;
+}
+
/* Implement arbitrary permutation of two V32QImode and V16QImode operands
with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
all the shorter instruction sequences. */
@@ -49089,6 +49162,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
return true;
+ if (expand_vec_perm_vpermi2_vpshub2 (d))
+ return true;
+
/* ??? Look for narrow permutations whose element orderings would
allow the promotion to a wider mode. */
@@ -49233,6 +49309,11 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
/* All implementable with a single vpermi2 insn. */
return true;
break;
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
+ return true;
+ break;
case V8SImode:
case V8SFmode:
case V4DFmode:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3aaea3..734e6b4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10713,7 +10713,7 @@
(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
(V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V32HI "TARGET_AVX512BW")])
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
(define_expand "vec_perm_const<mode>"
[(match_operand:VEC_PERM_CONST 0 "register_operand")