diff options
author | Alan Lawrence <alan.lawrence@arm.com> | 2014-11-13 16:27:37 +0000 |
---|---|---|
committer | Alan Lawrence <alalaw01@gcc.gnu.org> | 2014-11-13 16:27:37 +0000 |
commit | cf7aa6a3b79ac25df266aa4fcfe6c059243602aa (patch) | |
tree | 6780c5d8c1b45da220db82c4a0f897afcd467d9a /gcc/optabs.c | |
parent | 557be5a8767902f204e8afa04551a387eac33a26 (diff) | |
download | gcc-cf7aa6a3b79ac25df266aa4fcfe6c059243602aa.zip gcc-cf7aa6a3b79ac25df266aa4fcfe6c059243602aa.tar.gz gcc-cf7aa6a3b79ac25df266aa4fcfe6c059243602aa.tar.bz2 |
[Vectorizer] Use a VEC_PERM_EXPR instead of VEC_RSHIFT_EXPR; expand appropriate VEC_PERM_EXPRs using vec_shr_optab
* optabs.c (can_vec_perm_p): Update comment, does not consider vec_shr.
(shift_amt_for_vec_perm_mask): New.
(expand_vec_perm_1): Use vec_shr_optab if second vector is const0_rtx
and mask appropriate.
* tree-vect-loop.c (calc_vec_perm_mask_for_shift): New.
(have_whole_vector_shift): New.
(vect_model_reduction_cost): Call have_whole_vector_shift instead of
looking for vec_shr_optab.
(vect_create_epilog_for_reduction): Likewise; also rename local variable
have_whole_vector_shift to reduce_with_shift; output VEC_PERM_EXPRs
instead of VEC_RSHIFT_EXPRs.
* tree-vect-stmts.c (vect_gen_perm_mask_checked): Extend comment.
From-SVN: r217509
Diffstat (limited to 'gcc/optabs.c')
-rw-r--r-- | gcc/optabs.c | 48 |
1 files changed, 46 insertions, 2 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c index 3376f2d..4ddd9cc 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6567,8 +6567,11 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value); } -/* Return true if VEC_PERM_EXPR can be expanded using SIMD extensions - of the CPU. SEL may be NULL, which stands for an unknown constant. */ +/* Return true if VEC_PERM_EXPR of arbitrary input vectors can be expanded using + SIMD extensions of the CPU. SEL may be NULL, which stands for an unknown + constant. Note that additional permutations representing whole-vector shifts + may also be handled via the vec_shr optab, but only where the second input + vector is entirely constant zeroes; this case is not dealt with here. */ bool can_vec_perm_p (machine_mode mode, bool variable, @@ -6621,6 +6624,36 @@ can_vec_perm_p (machine_mode mode, bool variable, return true; } +/* Checks if vec_perm mask SEL is a constant equivalent to a shift of the first + vec_perm operand, assuming the second operand is a constant vector of zeroes. + Return the shift distance in bits if so, or NULL_RTX if the vec_perm is not a + shift. */ +static rtx +shift_amt_for_vec_perm_mask (rtx sel) +{ + unsigned int i, first, nelt = GET_MODE_NUNITS (GET_MODE (sel)); + unsigned int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (sel))); + + if (GET_CODE (sel) != CONST_VECTOR) + return NULL_RTX; + + first = INTVAL (CONST_VECTOR_ELT (sel, 0)); + if (first >= 2*nelt) + return NULL_RTX; + for (i = 1; i < nelt; i++) + { + int idx = INTVAL (CONST_VECTOR_ELT (sel, i)); + unsigned int expected = (i + first) & (2 * nelt - 1); + /* Indices into the second vector are all equivalent. */ + if (idx < 0 || (MIN (nelt, (unsigned) idx) != MIN (nelt, expected))) + return NULL_RTX; + } + + if (BYTES_BIG_ENDIAN) + first = (2 * nelt) - first; + return GEN_INT (first * bitsize); +} + /* A subroutine of expand_vec_perm for expanding one vec_perm insn. */ static rtx @@ -6649,6 +6682,17 @@ expand_vec_perm_1 (enum insn_code icode, rtx target, else { create_input_operand (&ops[1], v0, tmode); + /* See if this can be handled with a vec_shr. We only do this if the + second vector is all zeroes. */ + enum insn_code shift_code = optab_handler (vec_shr_optab, GET_MODE (v0)); + if (v1 == CONST0_RTX (GET_MODE (v1)) && shift_code) + if (rtx shift_amt = shift_amt_for_vec_perm_mask (sel)) + { + create_convert_operand_from_type (&ops[2], shift_amt, + sizetype_tab[(int) stk_sizetype]); + if (maybe_expand_insn (shift_code, 3, ops)) + return ops[0].value; + } create_input_operand (&ops[2], v1, tmode); } |