diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 38 | ||||
-rw-r--r-- | gcc/fold-const.c | 33 | ||||
-rw-r--r-- | gcc/optabs-query.c | 19 | ||||
-rw-r--r-- | gcc/optabs-query.h | 3 | ||||
-rw-r--r-- | gcc/target.h | 8 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.c | 11 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 62 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 8 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 24 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 25 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 52 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 4 |
12 files changed, 176 insertions, 111 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b52eced..a6c6fcc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,44 @@ Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> + * target.h (vec_perm_indices): New typedef. + (auto_vec_perm_indices): Likewise. + * optabs-query.h: Include target.h + (can_vec_perm_p): Take a vec_perm_indices *. + * optabs-query.c (can_vec_perm_p): Likewise. + (can_mult_highpart_p): Update accordingly. Use auto_vec_perm_indices. + * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. + * tree-vect-generic.c (lower_vec_perm): Likewise. + * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. + (vect_grouped_load_supported): Likewise. + (vect_shift_permute_load_chain): Likewise. + (vect_permute_store_chain): Use auto_vec_perm_indices. + (vect_permute_load_chain): Likewise. + * fold-const.c (fold_vec_perm): Take vec_perm_indices. + (fold_ternary_loc): Update accordingly. Use auto_vec_perm_indices. + Update uses of can_vec_perm_p. + * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the + mode with a number of elements. Take a vec_perm_indices *. + (vect_create_epilog_for_reduction): Update accordingly. + Use auto_vec_perm_indices. + (have_whole_vector_shift): Likewise. Update call to can_vec_perm_p. + * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. + (vect_transform_slp_perm_load): Likewise. + (vect_schedule_slp_instance): Use auto_vec_perm_indices. + * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices. + (vect_gen_perm_mask_checked): Likewise. + * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices. + (vect_gen_perm_mask_checked): Likewise. + (vectorizable_mask_load_store): Use auto_vec_perm_indices. + (vectorizable_store): Likewise. + (vectorizable_load): Likewise. + (perm_mask_for_reverse): Likewise. Update call to can_vec_perm_p. + (vectorizable_bswap): Likewise. + +2017-09-14 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + * tree.h (build_vector): Take a vec<tree> instead of a tree *. * tree.c (build_vector): Likewise. (build_vector_from_ctor): Update accordingly. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 013081d..fa9d1bb 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -8786,12 +8786,14 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) NULL_TREE otherwise. */ static tree -fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel) +fold_vec_perm (tree type, tree arg0, tree arg1, vec_perm_indices sel) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int i; bool need_ctor = false; - gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts + unsigned int nelts = sel.length (); + gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) @@ -11312,15 +11314,15 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, || TREE_CODE (arg2) == CONSTRUCTOR)) { unsigned int nelts = VECTOR_CST_NELTS (arg0), i; - unsigned char *sel = XALLOCAVEC (unsigned char, nelts); gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg0, i); if (integer_all_onesp (val)) - sel[i] = i; + sel.quick_push (i); else if (integer_zerop (val)) - sel[i] = nelts + i; + sel.quick_push (nelts + i); else /* Currently unreachable. */ return NULL_TREE; } @@ -11643,8 +11645,6 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, if (TREE_CODE (arg2) == VECTOR_CST) { unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; - unsigned char *sel = XALLOCAVEC (unsigned char, 2 * nelts); - unsigned char *sel2 = sel + nelts; bool need_mask_canon = false; bool need_mask_canon2 = false; bool all_in_vec0 = true; @@ -11656,6 +11656,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, mask2 = 2 * nelts - 1; mask = single_arg ? (nelts - 1) : mask2; gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); + auto_vec_perm_indices sel2 (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); @@ -11667,16 +11669,19 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, wide_int t = val; need_mask_canon |= wi::gtu_p (t, mask); need_mask_canon2 |= wi::gtu_p (t, mask2); - sel[i] = t.to_uhwi () & mask; - sel2[i] = t.to_uhwi () & mask2; + unsigned int elt = t.to_uhwi () & mask; + unsigned int elt2 = t.to_uhwi () & mask2; - if (sel[i] < nelts) + if (elt < nelts) all_in_vec1 = false; else all_in_vec0 = false; - if ((sel[i] & (nelts-1)) != i) + if ((elt & (nelts - 1)) != i) maybe_identity = false; + + sel.quick_push (elt); + sel2.quick_push (elt2); } if (maybe_identity) @@ -11714,8 +11719,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, argument permutation while still allowing an equivalent 2-argument version. */ if (need_mask_canon && arg2 == op2 - && !can_vec_perm_p (TYPE_MODE (type), false, sel) - && can_vec_perm_p (TYPE_MODE (type), false, sel2)) + && !can_vec_perm_p (TYPE_MODE (type), false, &sel) + && can_vec_perm_p (TYPE_MODE (type), false, &sel2)) { need_mask_canon = need_mask_canon2; sel = sel2; diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 81b1bd9..ced6f57 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -353,8 +353,7 @@ can_conditionally_move_p (machine_mode mode) zeroes; this case is not dealt with here. */ bool -can_vec_perm_p (machine_mode mode, bool variable, - const unsigned char *sel) +can_vec_perm_p (machine_mode mode, bool variable, vec_perm_indices *sel) { machine_mode qimode; @@ -368,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool variable, if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing && (sel == NULL || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, sel))) + || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0]))) return true; } @@ -460,7 +459,6 @@ int can_mult_highpart_p (machine_mode mode, bool uns_p) { optab op; - unsigned char *sel; unsigned i, nunits; op = uns_p ? umul_highpart_optab : smul_highpart_optab; @@ -472,7 +470,6 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) return 0; nunits = GET_MODE_NUNITS (mode); - sel = XALLOCAVEC (unsigned char, nunits); op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) @@ -480,9 +477,12 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (!BYTES_BIG_ENDIAN + + (i & ~1) + + ((i & 1) ? nunits : 0)); + if (can_vec_perm_p (mode, false, &sel)) return 2; } } @@ -493,9 +493,10 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); + if (can_vec_perm_p (mode, false, &sel)) return 3; } } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 1612fc8..9c2d574 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_OPTABS_QUERY_H #include "insn-opinit.h" +#include "target.h" /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ @@ -165,7 +166,7 @@ enum insn_code can_extend_p (machine_mode, machine_mode, int); enum insn_code can_float_p (machine_mode, machine_mode, int); enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); -bool can_vec_perm_p (machine_mode, bool, const unsigned char *); +bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *); enum insn_code widening_optab_handler (optab, machine_mode, machine_mode); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A,B,C,D) \ diff --git a/gcc/target.h b/gcc/target.h index 393de40..64e1d68 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -191,6 +191,14 @@ enum vect_cost_model_location { vect_epilogue = 2 }; +/* The type to use for vector permutes with a constant permute vector. + Each entry is an index into the concatenated input vectors. */ +typedef vec<unsigned char> vec_perm_indices; + +/* Same, but can be used to construct local permute vectors that are + automatically freed. */ +typedef auto_vec<unsigned char, 32> auto_vec_perm_indices; + /* The target structure. This holds all the backend hooks. */ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 82d940b..11511b4 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -1952,7 +1952,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) unsigned elem_size, nelts, i; enum tree_code code, conv_code; constructor_elt *elt; - unsigned char *sel; bool maybe_ident; gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); @@ -1965,7 +1964,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - sel = XALLOCAVEC (unsigned char, nelts); + auto_vec_perm_indices sel (nelts); orig = NULL; conv_code = ERROR_MARK; maybe_ident = true; @@ -2023,8 +2022,10 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) } if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) return false; - sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; - if (sel[i] != i) maybe_ident = false; + unsigned int elt = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + if (elt != i) + maybe_ident = false; + sel.quick_push (elt); } if (i < nelts) return false; @@ -2053,7 +2054,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { tree mask_type; - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (type), false, &sel)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 070c707..0b3b968 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -4547,7 +4547,8 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (VECTOR_MODE_P (mode)) { unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -4568,7 +4569,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = 0; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4585,7 +4586,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = nelt + j2++; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4605,13 +4606,13 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[i * 2] = i; sel[i * 2 + 1] = i + nelt; } - if (can_vec_perm_p (mode, false, sel)) - { - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, sel)) - return true; - } + if (can_vec_perm_p (mode, false, &sel)) + { + for (i = 0; i < nelt; i++) + sel[i] += nelt / 2; + if (can_vec_perm_p (mode, false, &sel)) + return true; + } } } @@ -4710,7 +4711,9 @@ vect_permute_store_chain (vec<tree> dr_chain, tree perm3_mask_low, perm3_mask_high; unsigned int i, n, log_length = exact_log2 (length); unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5132,7 +5135,8 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, if (VECTOR_MODE_P (mode)) { unsigned int i, j, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -5144,7 +5148,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = 3 * i + k; else sel[i] = 0; - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5157,7 +5161,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = i; else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5174,11 +5178,11 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, gcc_assert (pow2p_hwi (count)); for (i = 0; i < nelt; i++) sel[i] = i * 2; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) { for (i = 0; i < nelt; i++) sel[i] = i * 2 + 1; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) return true; } } @@ -5292,7 +5296,9 @@ vect_permute_load_chain (vec<tree> dr_chain, tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i, j, log_length = exact_log2 (length); unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5486,10 +5492,12 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i; unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); + result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), length * sizeof (tree)); @@ -5501,7 +5509,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i * 2; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5515,7 +5523,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i * 2 + 1; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5529,7 +5537,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {4 5 6 7 8 9 10 11}. */ for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5544,7 +5552,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i; for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5607,7 +5615,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = 3 * k + (l % 3); k++; } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5621,7 +5629,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {6 7 8 9 10 11 12 13}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5634,7 +5642,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5647,7 +5655,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {3 4 5 6 7 8 9 10}. */ for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5660,7 +5668,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 1341d66..b114ff0 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1300,13 +1300,13 @@ lower_vec_perm (gimple_stmt_iterator *gsi) if (TREE_CODE (mask) == VECTOR_CST) { - unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); + auto_vec_perm_indices sel_int (elements); for (i = 0; i < elements; ++i) - sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) - & (2 * elements - 1)); + sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) + & (2 * elements - 1)); - if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) + if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3b4a71e..8135219 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3698,15 +3698,15 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, } /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET - vector elements (not bits) for a vector of mode MODE. */ + vector elements (not bits) for a vector with NELT elements. */ static void -calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset, - unsigned char *sel) +calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, + vec_perm_indices *sel) { - unsigned int i, nelt = GET_MODE_NUNITS (mode); + unsigned int i; for (i = 0; i < nelt; i++) - sel[i] = (i + offset) & (2*nelt - 1); + sel->quick_push ((i + offset) & (2 * nelt - 1)); } /* Checks whether the target supports whole-vector shifts for vectors of mode @@ -3722,12 +3722,13 @@ have_whole_vector_shift (machine_mode mode) return false; unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); for (i = nelt/2; i >= 1; i/=2) { - calc_vec_perm_mask_for_shift (mode, i, sel); - if (!can_vec_perm_p (mode, false, sel)) + sel.truncate (0); + calc_vec_perm_mask_for_shift (i, nelt, &sel); + if (!can_vec_perm_p (mode, false, &sel)) return false; } return true; @@ -5059,7 +5060,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, if (reduce_with_shift && !slp_reduc) { int nelements = vec_size_in_bits / element_bitsize; - unsigned char *sel = XALLOCAVEC (unsigned char, nelements); + auto_vec_perm_indices sel (nelements); int elt_offset; @@ -5083,8 +5084,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, elt_offset >= 1; elt_offset /= 2) { - calc_vec_perm_mask_for_shift (mode, elt_offset, sel); - tree mask = vect_gen_perm_mask_any (vectype, sel); + sel.truncate (0); + calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); + tree mask = vect_gen_perm_mask_any (vectype, sel); epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, new_temp, zero_vec, mask); new_name = make_ssa_name (vec_dest, epilog_stmt); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 32ca6af..32174fe 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -873,15 +873,16 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (alt_stmt_code != ERROR_MARK && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) { - unsigned char *sel - = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype)); - for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i) + unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); + auto_vec_perm_indices sel (count); + for (i = 0; i < count; ++i) { - sel[i] = i; + unsigned int elt = i; if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) - sel[i] += TYPE_VECTOR_SUBPARTS (vectype); + elt += count; + sel.quick_push (elt); } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { for (i = 0; i < group_size; ++i) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) @@ -3486,7 +3487,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int mask_element; - unsigned char *mask; machine_mode mode; if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -3502,7 +3502,8 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); - mask = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices mask (nunits); + mask.quick_grow (nunits); /* Initialize the vect stmts of NODE to properly insert the generated stmts later. */ @@ -3577,7 +3578,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, if (index == nunits) { if (! noop_p - && ! can_vec_perm_p (mode, false, mask)) + && ! can_vec_perm_p (mode, false, &mask)) { if (dump_enabled_p ()) { @@ -3730,15 +3731,15 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, enum tree_code code0 = gimple_assign_rhs_code (stmt); enum tree_code ocode = ERROR_MARK; gimple *ostmt; - unsigned char *mask = XALLOCAVEC (unsigned char, group_size); + auto_vec_perm_indices mask (group_size); FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) if (gimple_assign_rhs_code (ostmt) != code0) { - mask[i] = 1; + mask.quick_push (1); ocode = gimple_assign_rhs_code (ostmt); } else - mask[i] = 0; + mask.quick_push (0); if (ocode != ERROR_MARK) { vec<gimple *> v0; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b5f706c..0cee0d4 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1706,15 +1706,14 @@ static tree perm_mask_for_reverse (tree vectype) { int i, nunits; - unsigned char *sel; nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; + sel.quick_push (nunits - 1 - i); - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, sel); } @@ -2171,19 +2170,20 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); + sel.quick_grow (nunits); for (i = 0; i < nunits; ++i) sel[i] = i < gather_off_nunits ? i : i + nunits - gather_off_nunits; @@ -2481,14 +2481,14 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, return false; unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); - unsigned char *elts = XALLOCAVEC (unsigned char, num_bytes); - unsigned char *elt = elts; unsigned word_bytes = num_bytes / nunits; + + auto_vec_perm_indices elts (num_bytes); for (unsigned i = 0; i < nunits; ++i) for (unsigned j = 0; j < word_bytes; ++j) - *elt++ = (i + 1) * word_bytes - j - 1; + elts.quick_push ((i + 1) * word_bytes - j - 1); - if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts)) + if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts)) return false; if (! vec_stmt) @@ -5803,22 +5803,22 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, modifier = NONE; else if (nunits == (unsigned int) scatter_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (scatter_off_nunits); for (i = 0; i < (unsigned int) scatter_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); gcc_assert (perm_mask != NULL_TREE); } else if (nunits == (unsigned int) scatter_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < (unsigned int) nunits; ++i) - sel[i] = i | scatter_off_nunits; + sel.quick_push (i | scatter_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); gcc_assert (perm_mask != NULL_TREE); @@ -6503,19 +6503,19 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, vect_gen_perm_mask_checked. */ tree -vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel) { tree mask_elt_type, mask_type, mask_vec; - int i, nunits; - nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int nunits = sel.length (); + gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype)); mask_elt_type = lang_hooks.types.type_for_mode (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_elt_type); auto_vec<tree, 32> mask_elts (nunits); - for (i = 0; i < nunits; ++i) + for (unsigned int i = 0; i < nunits; ++i) mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i])); mask_vec = build_vector (mask_type, mask_elts); @@ -6526,9 +6526,9 @@ vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) i.e. that the target supports the pattern _for arbitrary input vectors_. */ tree -vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel) { - gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel)); + gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel)); return vect_gen_perm_mask_any (vectype, sel); } @@ -6841,22 +6841,22 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = i < gather_off_nunits - ? i : i + nunits - gather_off_nunits; + sel.quick_push (i < gather_off_nunits + ? i : i + nunits - gather_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); ncopies *= 2; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4ee3c3f..7ed0078 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1151,8 +1151,8 @@ extern void vect_get_load_cost (struct data_reference *, int, bool, extern void vect_get_store_cost (struct data_reference *, int, unsigned int *, stmt_vector_for_cost *); extern bool vect_supportable_shift (enum tree_code, tree); -extern tree vect_gen_perm_mask_any (tree, const unsigned char *); -extern tree vect_gen_perm_mask_checked (tree, const unsigned char *); +extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); +extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); extern void optimize_mask_stores (struct loop*); /* In tree-vect-data-refs.c. */ |