aboutsummaryrefslogtreecommitdiff
path: root/gcc/optabs-query.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:26:06 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:26:06 +0000
commit7ac7e2868d450dfb9080166ddc4abcc21b86fab3 (patch)
tree944b586115ea01dff6dac70820714852c4cf2029 /gcc/optabs-query.c
parent4aae3cb3559802faee3b5cb58d9315dcc5000bc8 (diff)
downloadgcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.zip
gcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.tar.gz
gcc-7ac7e2868d450dfb9080166ddc4abcc21b86fab3.tar.bz2
Split can_vec_perm_p into can_vec_perm_{var,const}_p
This patch splits can_vec_perm_p into two functions: can_vec_perm_var_p for testing permute operations with variable selection vectors, and can_vec_perm_const_p for testing permute operations with specific constant selection vectors. This means that we can pass the constant selection vector by reference. Constant permutes can still use a variable permute as a fallback. A later patch adds a check to makre sure that we don't truncate the vector indices when doing this. However, have_whole_vector_shift checked: if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing) return false; which had the effect of disallowing the fallback to variable permutes. I'm not sure whether that was the intention or whether it was just supposed to short-cut the loop on targets that don't support permutes. (But then why bother? The first check in the loop would fail and we'd bail out straightaway.) The patch adds a parameter for disallowing the fallback. I think it makes sense to do this for the following code in the VEC_PERM_EXPR folder: /* Some targets are deficient and fail to expand a single argument permutation while still allowing an equivalent 2-argument version. */ if (need_mask_canon && arg2 == op2 && !can_vec_perm_p (TYPE_MODE (type), false, &sel) && can_vec_perm_p (TYPE_MODE (type), false, &sel2)) since it's really testing whether the expand_vec_perm_const code expects a particular form. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * optabs-query.h (can_vec_perm_p): Delete. (can_vec_perm_var_p, can_vec_perm_const_p): Declare. * optabs-query.c (can_vec_perm_p): Split into... (can_vec_perm_var_p, can_vec_perm_const_p): ...these two functions. (can_mult_highpart_p): Use can_vec_perm_const_p to test whether a particular selector is valid. * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. (vect_grouped_load_supported): Likewise. (vect_shift_permute_load_chain): Likewise. * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. (vect_transform_slp_perm_load): Likewise. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. (vectorizable_bswap): Likewise. (vect_gen_perm_mask_checked): Likewise. * fold-const.c (fold_ternary_loc): Likewise. Don't take implementations of variable permutation vectors into account when deciding which selector to use. * tree-vect-loop.c (have_whole_vector_shift): Don't check whether vec_perm_const_optab is supported; instead use can_vec_perm_const_p with a false third argument. * tree-vect-generic.c (lower_vec_perm): Use can_vec_perm_const_p to test whether the constant selector is valid and can_vec_perm_var_p to test whether a variable selector is valid. From-SVN: r256091
Diffstat (limited to 'gcc/optabs-query.c')
-rw-r--r--gcc/optabs-query.c90
1 files changed, 59 insertions, 31 deletions
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index b8e7e31..9092590 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -361,58 +361,86 @@ qimode_for_vec_perm (machine_mode mode)
return opt_machine_mode ();
}
-/* Return true if VEC_PERM_EXPR of arbitrary input vectors can be
- expanded using SIMD extensions of the CPU. SEL may be NULL, which
- stands for an unknown constant. Note that additional permutations
- representing whole-vector shifts may also be handled via the vec_shr
- optab, but only where the second input vector is entirely constant
- zeroes; this case is not dealt with here. */
+/* Return true if VEC_PERM_EXPRs with variable selector operands can be
+ expanded using SIMD extensions of the CPU. MODE is the mode of the
+ vectors being permuted. */
bool
-can_vec_perm_p (machine_mode mode, bool variable, const vec_perm_indices *sel)
+can_vec_perm_var_p (machine_mode mode)
{
- machine_mode qimode;
-
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
if (!VECTOR_MODE_P (mode))
return false;
- if (!variable)
- {
- if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
- && (sel == NULL
- || targetm.vectorize.vec_perm_const_ok == NULL
- || targetm.vectorize.vec_perm_const_ok (mode, *sel)))
- return true;
- }
-
if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
return true;
/* We allow fallback to a QI vector mode, and adjust the mask. */
+ machine_mode qimode;
if (!qimode_for_vec_perm (mode).exists (&qimode))
return false;
- /* ??? For completeness, we ought to check the QImode version of
- vec_perm_const_optab. But all users of this implicit lowering
- feature implement the variable vec_perm_optab. */
if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing)
return false;
/* In order to support the lowering of variable permutations,
we need to support shifts and adds. */
- if (variable)
+ if (GET_MODE_UNIT_SIZE (mode) > 2
+ && optab_handler (ashl_optab, mode) == CODE_FOR_nothing
+ && optab_handler (vashl_optab, mode) == CODE_FOR_nothing)
+ return false;
+ if (optab_handler (add_optab, qimode) == CODE_FOR_nothing)
+ return false;
+
+ return true;
+}
+
+/* Return true if the target directly supports VEC_PERM_EXPRs on vectors
+ of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it
+ is acceptable to force the selector into a register and use a variable
+ permute (if the target supports that).
+
+ Note that additional permutations representing whole-vector shifts may
+ also be handled via the vec_shr optab, but only where the second input
+ vector is entirely constant zeroes; this case is not dealt with here. */
+
+bool
+can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
+ bool allow_variable_p)
+{
+ /* If the target doesn't implement a vector mode for the vector type,
+ then no operations are supported. */
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ /* It's probably cheaper to test for the variable case first. */
+ if (allow_variable_p)
+ {
+ if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing)
+ return true;
+
+ /* Unlike can_vec_perm_var_p, we don't need to test for optabs
+ related computing the QImode selector, since that happens at
+ compile time. */
+ machine_mode qimode;
+ if (qimode_for_vec_perm (mode).exists (&qimode)
+ && direct_optab_handler (vec_perm_optab, qimode) != CODE_FOR_nothing)
+ return true;
+ }
+
+ if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing)
{
- if (GET_MODE_UNIT_SIZE (mode) > 2
- && optab_handler (ashl_optab, mode) == CODE_FOR_nothing
- && optab_handler (vashl_optab, mode) == CODE_FOR_nothing)
- return false;
- if (optab_handler (add_optab, qimode) == CODE_FOR_nothing)
- return false;
+ if (targetm.vectorize.vec_perm_const_ok == NULL
+ || targetm.vectorize.vec_perm_const_ok (mode, sel))
+ return true;
+
+ /* ??? For completeness, we ought to check the QImode version of
+ vec_perm_const_optab. But all users of this implicit lowering
+ feature implement the variable vec_perm_optab. */
}
- return true;
+ return false;
}
/* Find a widening optab even if it doesn't widen as much as we want.
@@ -472,7 +500,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
sel.quick_push (!BYTES_BIG_ENDIAN
+ (i & ~1)
+ ((i & 1) ? nunits : 0));
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return 2;
}
}
@@ -486,7 +514,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
- if (can_vec_perm_p (mode, false, &sel))
+ if (can_vec_perm_const_p (mode, sel))
return 3;
}
}