aboutsummaryrefslogtreecommitdiff
path: root/gcc/optabs.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:26:47 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:26:47 +0000
commite3342de49cbee48957acc749b9566eee230860be (patch)
tree32a86a752b83bafed11e1621d738a7fd284a93f7 /gcc/optabs.c
parent6da64f1b329f57c07f22ec034bc7bc4b0dc9e87b (diff)
downloadgcc-e3342de49cbee48957acc749b9566eee230860be.zip
gcc-e3342de49cbee48957acc749b9566eee230860be.tar.gz
gcc-e3342de49cbee48957acc749b9566eee230860be.tar.bz2
Make vec_perm_indices use new vector encoding
This patch changes vec_perm_indices from a plain vec<> to a class that stores a canonicalized permutation, using the same encoding as for VECTOR_CSTs. This means that vec_perm_indices now carries information about the number of vectors being permuted (currently always 1 or 2) and the number of elements in each input vector. A new vec_perm_builder class is used to actually build up the vector, like tree_vector_builder does for trees. vec_perm_indices is the completed representation, a bit like VECTOR_CST is for trees. The patch just does a mechanical conversion of the code to vec_perm_builder: a later patch uses explicit encodings where possible. The point of all this is that it makes the representation suitable for variable-length vectors. It's no longer necessary for the underlying vec<>s to store every element explicitly. In int-vector-builder.h, "using the same encoding as tree and rtx constants" describes the endpoint -- adding the rtx encoding comes later. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * int-vector-builder.h: New file. * vec-perm-indices.h: Include int-vector-builder.h. (vec_perm_indices): Redefine as an int_vector_builder. (auto_vec_perm_indices): Delete. (vec_perm_builder): Redefine as a stand-alone class. (vec_perm_indices::vec_perm_indices): New function. (vec_perm_indices::clamp): Likewise. * vec-perm-indices.c: Include fold-const.h and tree-vector-builder.h. (vec_perm_indices::new_vector): New function. (vec_perm_indices::new_expanded_vector): Update for new vec_perm_indices class. (vec_perm_indices::rotate_inputs): New function. (vec_perm_indices::all_in_range_p): Operate directly on the encoded form, without computing elided elements. (tree_to_vec_perm_builder): Operate directly on the VECTOR_CST encoding. Update for new vec_perm_indices class. * optabs.c (expand_vec_perm_const): Create a vec_perm_indices for the given vec_perm_builder. (expand_vec_perm_var): Update vec_perm_builder constructor. (expand_mult_highpart): Use vec_perm_builder instead of auto_vec_perm_indices. * optabs-query.c (can_mult_highpart_p): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Use a single or double series encoding as appropriate. * fold-const.c (fold_ternary_loc): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. (vect_permute_store_chain): Likewise. (vect_grouped_load_supported): Likewise. (vect_permute_load_chain): Likewise. (vect_shift_permute_load_chain): Likewise. * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. (vect_transform_slp_perm_load): Likewise. (vect_schedule_slp_instance): Likewise. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. (vectorizable_mask_load_store): Likewise. (vectorizable_bswap): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. * tree-vect-generic.c (lower_vec_perm): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Use tree_to_vec_perm_builder to read the vector from a tree. * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Take a vec_perm_builder instead of a vec_perm_indices. (have_whole_vector_shift): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Leave the truncation to calc_vec_perm_mask_for_shift. (vect_create_epilog_for_reduction): Likewise. * config/aarch64/aarch64.c (expand_vec_perm_d::perm): Change from auto_vec_perm_indices to vec_perm_indices. (aarch64_expand_vec_perm_const_1): Use rotate_inputs on d.perm instead of changing individual elements. (aarch64_vectorize_vec_perm_const): Use new_vector to install the vector in d.perm. * config/arm/arm.c (expand_vec_perm_d::perm): Change from auto_vec_perm_indices to vec_perm_indices. (arm_expand_vec_perm_const_1): Use rotate_inputs on d.perm instead of changing individual elements. (arm_vectorize_vec_perm_const): Use new_vector to install the vector in d.perm. * config/powerpcspe/powerpcspe.c (rs6000_expand_extract_even): Update vec_perm_builder constructor. (rs6000_expand_interleave): Likewise. * config/rs6000/rs6000.c (rs6000_expand_extract_even): Likewise. (rs6000_expand_interleave): Likewise. From-SVN: r256095
Diffstat (limited to 'gcc/optabs.c')
-rw-r--r--gcc/optabs.c19
1 files changed, 12 insertions, 7 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 3a41a65..db13a25 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5476,6 +5476,11 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
rtx_insn *last = get_last_insn ();
bool single_arg_p = rtx_equal_p (v0, v1);
+ /* Always specify two input vectors here and leave the target to handle
+ cases in which the inputs are equal. Not all backends can cope with
+ the single-input representation when testing for a double-input
+ target instruction. */
+ vec_perm_indices indices (sel, 2, GET_MODE_NUNITS (mode));
/* See if this can be handled with a vec_shr. We only do this if the
second vector is all zeroes. */
@@ -5488,7 +5493,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
&& (shift_code != CODE_FOR_nothing
|| shift_code_qi != CODE_FOR_nothing))
{
- rtx shift_amt = shift_amt_for_vec_perm_mask (mode, sel);
+ rtx shift_amt = shift_amt_for_vec_perm_mask (mode, indices);
if (shift_amt)
{
struct expand_operand ops[3];
@@ -5520,7 +5525,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
else
v1 = force_reg (mode, v1);
- if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, sel))
+ if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices))
return target;
}
@@ -5529,7 +5534,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
rtx target_qi = NULL_RTX, v0_qi = NULL_RTX, v1_qi = NULL_RTX;
if (qimode != VOIDmode)
{
- qimode_indices.new_expanded_vector (sel, GET_MODE_UNIT_SIZE (mode));
+ qimode_indices.new_expanded_vector (indices, GET_MODE_UNIT_SIZE (mode));
target_qi = gen_reg_rtx (qimode);
v0_qi = gen_lowpart (qimode, v0);
v1_qi = gen_lowpart (qimode, v1);
@@ -5556,7 +5561,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
REQUIRED_SEL_MODE is OK. */
if (sel_mode != required_sel_mode)
{
- if (!selector_fits_mode_p (required_sel_mode, sel))
+ if (!selector_fits_mode_p (required_sel_mode, indices))
{
delete_insns_since (last);
return NULL_RTX;
@@ -5567,7 +5572,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
insn_code icode = direct_optab_handler (vec_perm_optab, mode);
if (icode != CODE_FOR_nothing)
{
- rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, sel);
+ rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, indices);
rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel_rtx);
if (tmp)
return tmp;
@@ -5642,7 +5647,7 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
gcc_assert (sel != NULL);
/* Broadcast the low byte each element into each of its bytes. */
- vec_perm_builder const_sel (w);
+ vec_perm_builder const_sel (w, w, 1);
for (i = 0; i < w; ++i)
{
int this_e = i / u * u;
@@ -5890,7 +5895,7 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
expand_insn (optab_handler (tab2, mode), 3, eops);
m2 = gen_lowpart (mode, eops[0].value);
- auto_vec_perm_indices sel (nunits);
+ vec_perm_builder sel (nunits, nunits, 1);
if (method == 2)
{
for (i = 0; i < nunits; ++i)