aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2012-03-20 17:27:05 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2012-03-20 17:27:05 +0100
commite5069d626e754ca7dd2d269dbb57d83e3a0eca38 (patch)
treebc8243ddf7ed3298ce8108696fe0570a6af4ed42 /gcc
parent6945a32ec3fb20b5bba0d98a419da26c1c6b62d4 (diff)
downloadgcc-e5069d626e754ca7dd2d269dbb57d83e3a0eca38.zip
gcc-e5069d626e754ca7dd2d269dbb57d83e3a0eca38.tar.gz
gcc-e5069d626e754ca7dd2d269dbb57d83e3a0eca38.tar.bz2
i386.c (vselect_insn): New variable.
* config/i386/i386.c (vselect_insn): New variable. (init_vselect_insn): New function. (expand_vselect, expand_vselect_insn): Add testing_p argument. Call init_vselect_insn if vselect_insn is NULL. Adjust PATTERN (vselect_insn), instead of creating a new insn each time, only emit a copy of it if not testing and recog has been successful. (expand_vec_perm_pshufb, expand_vec_perm_1, expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust callers. From-SVN: r185578
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/i386/i386.c102
2 files changed, 86 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bc9a5b8..f54b320 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
2012-03-20 Jakub Jelinek <jakub@redhat.com>
+ * config/i386/i386.c (vselect_insn): New variable.
+ (init_vselect_insn): New function.
+ (expand_vselect, expand_vselect_insn): Add testing_p argument.
+ Call init_vselect_insn if vselect_insn is NULL. Adjust
+ PATTERN (vselect_insn), instead of creating a new insn each time,
+ only emit a copy of it if not testing and recog has been successful.
+ (expand_vec_perm_pshufb, expand_vec_perm_1,
+ expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust
+ callers.
+
PR target/52607
* config/i386/i386.md ("isa" attribute): Add avx2 and noavx2.
("enabled" attribute): Handle avx2 and noavx2 isas.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 30ed616..99c9d3d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -35535,43 +35535,88 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
+/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
+ insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
+ insn every time. */
+
+static GTY(()) rtx vselect_insn;
+
+/* Initialize vselect_insn. */
+
+static void
+init_vselect_insn (void)
+{
+ unsigned i;
+ rtx x;
+
+ x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
+ for (i = 0; i < MAX_VECT_LEN; ++i)
+ XVECEXP (x, 0, i) = const0_rtx;
+ x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
+ const0_rtx), x);
+ x = gen_rtx_SET (VOIDmode, const0_rtx, x);
+ start_sequence ();
+ vselect_insn = emit_insn (x);
+ end_sequence ();
+}
+
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
static bool
-expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+expand_vselect (rtx target, rtx op0, const unsigned char *perm,
+ unsigned nelt, bool testing_p)
{
- rtx rperm[MAX_VECT_LEN], x;
- unsigned i;
+ unsigned int i;
+ rtx x, save_vconcat;
+ int icode;
+
+ if (vselect_insn == NULL_RTX)
+ init_vselect_insn ();
+ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
+ PUT_NUM_ELEM (XVEC (x, 0), nelt);
for (i = 0; i < nelt; ++i)
- rperm[i] = GEN_INT (perm[i]);
+ XVECEXP (x, 0, i) = GEN_INT (perm[i]);
+ save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
+ PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
+ SET_DEST (PATTERN (vselect_insn)) = target;
+ icode = recog_memoized (vselect_insn);
- x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
- x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
- x = gen_rtx_SET (VOIDmode, target, x);
+ if (icode >= 0 && !testing_p)
+ emit_insn (copy_rtx (PATTERN (vselect_insn)));
- x = emit_insn (x);
- if (recog_memoized (x) < 0)
- {
- remove_insn (x);
- return false;
- }
- return true;
+ SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
+ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
+ INSN_CODE (vselect_insn) = -1;
+
+ return icode >= 0;
}
/* Similar, but generate a vec_concat from op0 and op1 as well. */
static bool
expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
- const unsigned char *perm, unsigned nelt)
+ const unsigned char *perm, unsigned nelt,
+ bool testing_p)
{
enum machine_mode v2mode;
rtx x;
+ bool ok;
+
+ if (vselect_insn == NULL_RTX)
+ init_vselect_insn ();
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
- x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
- return expand_vselect (target, x, perm, nelt);
+ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+ PUT_MODE (x, v2mode);
+ XEXP (x, 0) = op0;
+ XEXP (x, 1) = op1;
+ ok = expand_vselect (target, x, perm, nelt, testing_p);
+ XEXP (x, 0) = const0_rtx;
+ XEXP (x, 1) = const0_rtx;
+ return ok;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
@@ -35903,7 +35948,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return true;
return expand_vselect (gen_lowpart (V4DImode, d->target),
gen_lowpart (V4DImode, d->op0),
- perm, 4);
+ perm, 4, false);
}
/* Next see if vpermd can be used. */
@@ -36051,7 +36096,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
- if (expand_vselect (d->target, d->op0, perm2, nelt))
+ if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
return true;
/* There are plenty of patterns in sse.md that are written for
@@ -36065,7 +36110,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i] = d->perm[i] & mask;
perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
}
- if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+ d->testing_p))
return true;
/* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
@@ -36079,13 +36125,15 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
}
- if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+ d->testing_p))
return true;
}
}
/* Finally, try the fully general two operand permute. */
- if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
+ d->testing_p))
return true;
/* Recognize interleave style patterns with reversed operands. */
@@ -36101,7 +36149,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i] = e;
}
- if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+ if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
+ d->testing_p))
return true;
}
@@ -36149,14 +36198,14 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
memcpy (perm2, d->perm, 4);
for (i = 4; i < 8; ++i)
perm2[i] = i;
- ok = expand_vselect (d->target, d->op0, perm2, 8);
+ ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
gcc_assert (ok);
/* Emit the pshufhw. */
memcpy (perm2 + 4, d->perm + 4, 4);
for (i = 0; i < 4; ++i)
perm2[i] = i;
- ok = expand_vselect (d->target, d->target, perm2, 8);
+ ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
gcc_assert (ok);
return true;
@@ -37190,7 +37239,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
while (vmode != V4SImode);
memset (perm2, elt, 4);
- ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
+ ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
+ d->testing_p);
gcc_assert (ok);
return true;