diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2019-11-16 10:36:20 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-11-16 10:36:20 +0000 |
commit | f884cd2fea62eebe71b422e1c97e550958dd42ae (patch) | |
tree | ef31abcde95dea1776d82c4551525b79d39e9687 /gcc | |
parent | 9b75f56d4b7951c60a656396dddd4a65787b95bc (diff) | |
download | gcc-f884cd2fea62eebe71b422e1c97e550958dd42ae.zip gcc-f884cd2fea62eebe71b422e1c97e550958dd42ae.tar.gz gcc-f884cd2fea62eebe71b422e1c97e550958dd42ae.tar.bz2 |
Extend can_duplicate_and_interleave_p to mixed-size vectors
This patch makes can_duplicate_and_interleave_p cope with mixtures of
vector sizes, by using queries based on get_vectype_for_scalar_type
instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
int_mode_for_size is now the first check we do for a candidate mode,
so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids
unnecessary work and avoids trying to create scalar types that the
target might not support.
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
element type rather than an element mode.
* tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
Use get_vectype_for_scalar_type to query the natural types
for a given element type rather than basing everything on
GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size
query to MAX_FIXED_MODE_SIZE.
(duplicate_and_interleave): Update call accordingly.
* tree-vect-loop.c (vectorizable_reduction): Likewise.
From-SVN: r278335
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 3 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 33 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 3 |
4 files changed, 36 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a6333b1..7d48dbc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,17 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an + element type rather than an element mode. + * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise. + Use get_vectype_for_scalar_type to query the natural types + for a given element type rather than basing everything on + GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size + query to MAX_FIXED_MODE_SIZE. + (duplicate_and_interleave): Update call accordingly. + * tree-vect-loop.c (vectorizable_reduction): Likewise. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * tree-vectorizer.h (vect_get_vector_types_for_stmt): Take an optional maximum nunits. (get_vectype_for_scalar_type): Likewise. Also declare a form that diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index e6ba91c..37290fa 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6365,10 +6365,9 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, that value needs to be repeated for every instance of the statement within the initial vector. */ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); - scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out)); if (!neutral_op && !can_duplicate_and_interleave_p (loop_vinfo, group_size, - elt_mode)) + TREE_TYPE (vectype_out))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e5a7078..50f317f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -264,7 +264,7 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info, return -1; } -/* Check whether it is possible to load COUNT elements of type ELT_MODE +/* Check whether it is possible to load COUNT elements of type ELT_TYPE using the method implemented by duplicate_and_interleave. Return true if so, returning the number of intermediate vectors in *NVECTORS_OUT (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT @@ -272,26 +272,37 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info, bool can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, - machine_mode elt_mode, - unsigned int *nvectors_out, + tree elt_type, unsigned int *nvectors_out, tree *vector_type_out, tree *permutes) { - poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); - poly_int64 nelts; + tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count); + if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type))) + return false; + + machine_mode base_vector_mode = TYPE_MODE (base_vector_type); + poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode); unsigned int nvectors = 1; for (;;) { scalar_int_mode int_mode; poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; - if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts) - && int_mode_for_size (elt_bits, 0).exists (&int_mode)) + if (int_mode_for_size (elt_bits, 1).exists (&int_mode)) { + /* Get the natural vector type for this SLP group size. */ tree int_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (int_mode), 1); - tree vector_type = build_vector_type (int_type, nelts); - if (VECTOR_MODE_P (TYPE_MODE (vector_type))) + tree vector_type + = get_vectype_for_scalar_type (vinfo, int_type, count); + if (vector_type + && VECTOR_MODE_P (TYPE_MODE (vector_type)) + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), + GET_MODE_SIZE (base_vector_mode))) { + /* Try fusing consecutive sequences of COUNT / NVECTORS elements + together into elements of type INT_TYPE and using the result + to build NVECTORS vectors. */ + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); vec_perm_builder sel1 (nelts, 2, 3); vec_perm_builder sel2 (nelts, 2, 3); poly_int64 half_nelts = exact_div (nelts, 2); @@ -491,7 +502,7 @@ again: && !GET_MODE_SIZE (vinfo->vector_mode).is_constant () && (TREE_CODE (type) == BOOLEAN_TYPE || !can_duplicate_and_interleave_p (vinfo, stmts.length (), - TYPE_MODE (type)))) + type))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3552,7 +3563,7 @@ duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type, unsigned int nvectors = 1; tree new_vector_type; tree permutes[2]; - if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type), + if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type, &nvectors, &new_vector_type, permutes)) gcc_unreachable (); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5f19411..fd88084 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1795,8 +1795,7 @@ extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U); extern bool vect_slp_bb (basic_block); extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, - machine_mode, +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, unsigned int * = NULL, tree * = NULL, tree * = NULL); extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, |