diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-02 18:26:06 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-02 18:26:06 +0000 |
commit | 7ac7e2868d450dfb9080166ddc4abcc21b86fab3 (patch) | |
tree | 944b586115ea01dff6dac70820714852c4cf2029 /gcc/optabs-query.c | |
parent | 4aae3cb3559802faee3b5cb58d9315dcc5000bc8 (diff) | |
download | gcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.zip gcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.tar.gz gcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.tar.bz2 |
Split can_vec_perm_p into can_vec_perm_{var,const}_p
This patch splits can_vec_perm_p into two functions: can_vec_perm_var_p
for testing permute operations with variable selection vectors, and
can_vec_perm_const_p for testing permute operations with specific
constant selection vectors. This means that we can pass the constant
selection vector by reference.
Constant permutes can still use a variable permute as a fallback.
A later patch adds a check to makre sure that we don't truncate the
vector indices when doing this.
However, have_whole_vector_shift checked:
if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
return false;
which had the effect of disallowing the fallback to variable permutes.
I'm not sure whether that was the intention or whether it was just
supposed to short-cut the loop on targets that don't support permutes.
(But then why bother? The first check in the loop would fail and
we'd bail out straightaway.)
The patch adds a parameter for disallowing the fallback. I think it
makes sense to do this for the following code in the VEC_PERM_EXPR
folder:
/* Some targets are deficient and fail to expand a single
argument permutation while still allowing an equivalent
2-argument version. */
if (need_mask_canon && arg2 == op2
&& !can_vec_perm_p (TYPE_MODE (type), false, &sel)
&& can_vec_perm_p (TYPE_MODE (type), false, &sel2))
since it's really testing whether the expand_vec_perm_const code expects
a particular form.
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* optabs-query.h (can_vec_perm_p): Delete.
(can_vec_perm_var_p, can_vec_perm_const_p): Declare.
* optabs-query.c (can_vec_perm_p): Split into...
(can_vec_perm_var_p, can_vec_perm_const_p): ...these two functions.
(can_mult_highpart_p): Use can_vec_perm_const_p to test whether a
particular selector is valid.
* tree-ssa-forwprop.c (simplify_vector_constructor): Likewise.
* tree-vect-data-refs.c (vect_grouped_store_supported): Likewise.
(vect_grouped_load_supported): Likewise.
(vect_shift_permute_load_chain): Likewise.
* tree-vect-slp.c (vect_build_slp_tree_1): Likewise.
(vect_transform_slp_perm_load): Likewise.
* tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
(vectorizable_bswap): Likewise.
(vect_gen_perm_mask_checked): Likewise.
* fold-const.c (fold_ternary_loc): Likewise. Don't take
implementations of variable permutation vectors into account
when deciding which selector to use.
* tree-vect-loop.c (have_whole_vector_shift): Don't check whether
vec_perm_const_optab is supported; instead use can_vec_perm_const_p
with a false third argument.
* tree-vect-generic.c (lower_vec_perm): Use can_vec_perm_const_p
to test whether the constant selector is valid and can_vec_perm_var_p
to test whether a variable selector is valid.
From-SVN: r256091
Diffstat (limited to 'gcc/optabs-query.c')
-rw-r--r-- | gcc/optabs-query.c | 90 |
1 files changed, 59 insertions, 31 deletions
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index b8e7e31..9092590 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -361,58 +361,86 @@ qimode_for_vec_perm (machine_mode mode) return opt_machine_mode (); } -/* Return true if VEC_PERM_EXPR of arbitrary input vectors can be - expanded using SIMD extensions of the CPU. SEL may be NULL, which - stands for an unknown constant. Note that additional permutations - representing whole-vector shifts may also be handled via the vec_shr - optab, but only where the second input vector is entirely constant - zeroes; this case is not dealt with here. */ +/* Return true if VEC_PERM_EXPRs with variable selector operands can be + expanded using SIMD extensions of the CPU. MODE is the mode of the + vectors being permuted. */ bool -can_vec_perm_p (machine_mode mode, bool variable, const vec_perm_indices *sel) +can_vec_perm_var_p (machine_mode mode) { - machine_mode qimode; - /* If the target doesn't implement a vector mode for the vector type, then no operations are supported. */ if (!VECTOR_MODE_P (mode)) return false; - if (!variable) - { - if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing - && (sel == NULL - || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, *sel))) - return true; - } - if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing) return true; /* We allow fallback to a QI vector mode, and adjust the mask. */ + machine_mode qimode; if (!qimode_for_vec_perm (mode).exists (&qimode)) return false; - /* ??? For completeness, we ought to check the QImode version of - vec_perm_const_optab. But all users of this implicit lowering - feature implement the variable vec_perm_optab. */ if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing) return false; /* In order to support the lowering of variable permutations, we need to support shifts and adds. */ - if (variable) + if (GET_MODE_UNIT_SIZE (mode) > 2 + && optab_handler (ashl_optab, mode) == CODE_FOR_nothing + && optab_handler (vashl_optab, mode) == CODE_FOR_nothing) + return false; + if (optab_handler (add_optab, qimode) == CODE_FOR_nothing) + return false; + + return true; +} + +/* Return true if the target directly supports VEC_PERM_EXPRs on vectors + of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it + is acceptable to force the selector into a register and use a variable + permute (if the target supports that). + + Note that additional permutations representing whole-vector shifts may + also be handled via the vec_shr optab, but only where the second input + vector is entirely constant zeroes; this case is not dealt with here. */ + +bool +can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, + bool allow_variable_p) +{ + /* If the target doesn't implement a vector mode for the vector type, + then no operations are supported. */ + if (!VECTOR_MODE_P (mode)) + return false; + + /* It's probably cheaper to test for the variable case first. */ + if (allow_variable_p) + { + if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing) + return true; + + /* Unlike can_vec_perm_var_p, we don't need to test for optabs + related computing the QImode selector, since that happens at + compile time. */ + machine_mode qimode; + if (qimode_for_vec_perm (mode).exists (&qimode) + && direct_optab_handler (vec_perm_optab, qimode) != CODE_FOR_nothing) + return true; + } + + if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing) { - if (GET_MODE_UNIT_SIZE (mode) > 2 - && optab_handler (ashl_optab, mode) == CODE_FOR_nothing - && optab_handler (vashl_optab, mode) == CODE_FOR_nothing) - return false; - if (optab_handler (add_optab, qimode) == CODE_FOR_nothing) - return false; + if (targetm.vectorize.vec_perm_const_ok == NULL + || targetm.vectorize.vec_perm_const_ok (mode, sel)) + return true; + + /* ??? For completeness, we ought to check the QImode version of + vec_perm_const_optab. But all users of this implicit lowering + feature implement the variable vec_perm_optab. */ } - return true; + return false; } /* Find a widening optab even if it doesn't widen as much as we want. @@ -472,7 +500,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0)); - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return 2; } } @@ -486,7 +514,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return 3; } } |