aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog38
-rw-r--r--gcc/fold-const.c33
-rw-r--r--gcc/optabs-query.c19
-rw-r--r--gcc/optabs-query.h3
-rw-r--r--gcc/target.h8
-rw-r--r--gcc/tree-ssa-forwprop.c11
-rw-r--r--gcc/tree-vect-data-refs.c62
-rw-r--r--gcc/tree-vect-generic.c8
-rw-r--r--gcc/tree-vect-loop.c24
-rw-r--r--gcc/tree-vect-slp.c25
-rw-r--r--gcc/tree-vect-stmts.c52
-rw-r--r--gcc/tree-vectorizer.h4
12 files changed, 176 insertions, 111 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b52eced..a6c6fcc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -2,6 +2,44 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * target.h (vec_perm_indices): New typedef.
+ (auto_vec_perm_indices): Likewise.
+ * optabs-query.h: Include target.h
+ (can_vec_perm_p): Take a vec_perm_indices *.
+ * optabs-query.c (can_vec_perm_p): Likewise.
+ (can_mult_highpart_p): Update accordingly. Use auto_vec_perm_indices.
+ * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise.
+ * tree-vect-generic.c (lower_vec_perm): Likewise.
+ * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise.
+ (vect_grouped_load_supported): Likewise.
+ (vect_shift_permute_load_chain): Likewise.
+ (vect_permute_store_chain): Use auto_vec_perm_indices.
+ (vect_permute_load_chain): Likewise.
+ * fold-const.c (fold_vec_perm): Take vec_perm_indices.
+ (fold_ternary_loc): Update accordingly. Use auto_vec_perm_indices.
+ Update uses of can_vec_perm_p.
+ * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the
+ mode with a number of elements. Take a vec_perm_indices *.
+ (vect_create_epilog_for_reduction): Update accordingly.
+ Use auto_vec_perm_indices.
+ (have_whole_vector_shift): Likewise. Update call to can_vec_perm_p.
+ * tree-vect-slp.c (vect_build_slp_tree_1): Likewise.
+ (vect_transform_slp_perm_load): Likewise.
+ (vect_schedule_slp_instance): Use auto_vec_perm_indices.
+ * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices.
+ (vect_gen_perm_mask_checked): Likewise.
+ * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices.
+ (vect_gen_perm_mask_checked): Likewise.
+ (vectorizable_mask_load_store): Use auto_vec_perm_indices.
+ (vectorizable_store): Likewise.
+ (vectorizable_load): Likewise.
+ (perm_mask_for_reverse): Likewise. Update call to can_vec_perm_p.
+ (vectorizable_bswap): Likewise.
+
+2017-09-14 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* tree.h (build_vector): Take a vec<tree> instead of a tree *.
* tree.c (build_vector): Likewise.
(build_vector_from_ctor): Update accordingly.
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 013081d..fa9d1bb 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -8786,12 +8786,14 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts)
NULL_TREE otherwise. */
static tree
-fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel)
+fold_vec_perm (tree type, tree arg0, tree arg1, vec_perm_indices sel)
{
- unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+ unsigned int i;
bool need_ctor = false;
- gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
+ unsigned int nelts = sel.length ();
+ gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts);
if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type)
|| TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type))
@@ -11312,15 +11314,15 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
|| TREE_CODE (arg2) == CONSTRUCTOR))
{
unsigned int nelts = VECTOR_CST_NELTS (arg0), i;
- unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
+ auto_vec_perm_indices sel (nelts);
for (i = 0; i < nelts; i++)
{
tree val = VECTOR_CST_ELT (arg0, i);
if (integer_all_onesp (val))
- sel[i] = i;
+ sel.quick_push (i);
else if (integer_zerop (val))
- sel[i] = nelts + i;
+ sel.quick_push (nelts + i);
else /* Currently unreachable. */
return NULL_TREE;
}
@@ -11643,8 +11645,6 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
if (TREE_CODE (arg2) == VECTOR_CST)
{
unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2;
- unsigned char *sel = XALLOCAVEC (unsigned char, 2 * nelts);
- unsigned char *sel2 = sel + nelts;
bool need_mask_canon = false;
bool need_mask_canon2 = false;
bool all_in_vec0 = true;
@@ -11656,6 +11656,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
mask2 = 2 * nelts - 1;
mask = single_arg ? (nelts - 1) : mask2;
gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
+ auto_vec_perm_indices sel (nelts);
+ auto_vec_perm_indices sel2 (nelts);
for (i = 0; i < nelts; i++)
{
tree val = VECTOR_CST_ELT (arg2, i);
@@ -11667,16 +11669,19 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
wide_int t = val;
need_mask_canon |= wi::gtu_p (t, mask);
need_mask_canon2 |= wi::gtu_p (t, mask2);
- sel[i] = t.to_uhwi () & mask;
- sel2[i] = t.to_uhwi () & mask2;
+ unsigned int elt = t.to_uhwi () & mask;
+ unsigned int elt2 = t.to_uhwi () & mask2;
- if (sel[i] < nelts)
+ if (elt < nelts)
all_in_vec1 = false;
else
all_in_vec0 = false;
- if ((sel[i] & (nelts-1)) != i)
+ if ((elt & (nelts - 1)) != i)
maybe_identity = false;
+
+ sel.quick_push (elt);
+ sel2.quick_push (elt2);
}
if (maybe_identity)
@@ -11714,8 +11719,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
argument permutation while still allowing an equivalent
2-argument version. */
if (need_mask_canon && arg2 == op2
- && !can_vec_perm_p (TYPE_MODE (type), false, sel)
- && can_vec_perm_p (TYPE_MODE (type), false, sel2))
+ && !can_vec_perm_p (TYPE_MODE (type), false, &sel)
+ && can_vec_perm_p (TYPE_MODE (type), false, &sel2))
{
need_mask_canon = need_mask_canon2;
sel = sel2;
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index 81b1bd9..ced6f57 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -353,8 +353,7 @@ can_conditionally_move_p (machine_mode mode)
zeroes; this case is not dealt with here. */
bool
-can_vec_perm_p (machine_mode mode, bool variable,
- const unsigned char *sel)
+can_vec_perm_p (machine_mode mode, bool variable, vec_perm_indices *sel)
{
machine_mode qimode;
@@ -368,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool variable,
if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
&& (sel == NULL
|| targetm.vectorize.vec_perm_const_ok == NULL
- || targetm.vectorize.vec_perm_const_ok (mode, sel)))
+ || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0])))
return true;
}
@@ -460,7 +459,6 @@ int
can_mult_highpart_p (machine_mode mode, bool uns_p)
{
optab op;
- unsigned char *sel;
unsigned i, nunits;
op = uns_p ? umul_highpart_optab : smul_highpart_optab;
@@ -472,7 +470,6 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
return 0;
nunits = GET_MODE_NUNITS (mode);
- sel = XALLOCAVEC (unsigned char, nunits);
op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
@@ -480,9 +477,12 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
+ auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
- sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
- if (can_vec_perm_p (mode, false, sel))
+ sel.quick_push (!BYTES_BIG_ENDIAN
+ + (i & ~1)
+ + ((i & 1) ? nunits : 0));
+ if (can_vec_perm_p (mode, false, &sel))
return 2;
}
}
@@ -493,9 +493,10 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
if (optab_handler (op, mode) != CODE_FOR_nothing)
{
+ auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
- sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
- if (can_vec_perm_p (mode, false, sel))
+ sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
+ if (can_vec_perm_p (mode, false, &sel))
return 3;
}
}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 1612fc8..9c2d574 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see
#define GCC_OPTABS_QUERY_H
#include "insn-opinit.h"
+#include "target.h"
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
if the target does not have such an insn. */
@@ -165,7 +166,7 @@ enum insn_code can_extend_p (machine_mode, machine_mode, int);
enum insn_code can_float_p (machine_mode, machine_mode, int);
enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *);
bool can_conditionally_move_p (machine_mode mode);
-bool can_vec_perm_p (machine_mode, bool, const unsigned char *);
+bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *);
enum insn_code widening_optab_handler (optab, machine_mode, machine_mode);
/* Find a widening optab even if it doesn't widen as much as we want. */
#define find_widening_optab_handler(A,B,C,D) \
diff --git a/gcc/target.h b/gcc/target.h
index 393de40..64e1d68 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -191,6 +191,14 @@ enum vect_cost_model_location {
vect_epilogue = 2
};
+/* The type to use for vector permutes with a constant permute vector.
+ Each entry is an index into the concatenated input vectors. */
+typedef vec<unsigned char> vec_perm_indices;
+
+/* Same, but can be used to construct local permute vectors that are
+ automatically freed. */
+typedef auto_vec<unsigned char, 32> auto_vec_perm_indices;
+
/* The target structure. This holds all the backend hooks. */
#define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
#define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 82d940b..11511b4 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -1952,7 +1952,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
unsigned elem_size, nelts, i;
enum tree_code code, conv_code;
constructor_elt *elt;
- unsigned char *sel;
bool maybe_ident;
gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
@@ -1965,7 +1964,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
elem_type = TREE_TYPE (type);
elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
- sel = XALLOCAVEC (unsigned char, nelts);
+ auto_vec_perm_indices sel (nelts);
orig = NULL;
conv_code = ERROR_MARK;
maybe_ident = true;
@@ -2023,8 +2022,10 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
}
if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size)
return false;
- sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
- if (sel[i] != i) maybe_ident = false;
+ unsigned int elt = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size;
+ if (elt != i)
+ maybe_ident = false;
+ sel.quick_push (elt);
}
if (i < nelts)
return false;
@@ -2053,7 +2054,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
{
tree mask_type;
- if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (type), false, &sel))
return false;
mask_type
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 070c707..0b3b968 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -4547,7 +4547,8 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
if (VECTOR_MODE_P (mode))
{
unsigned int i, nelt = GET_MODE_NUNITS (mode);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+ auto_vec_perm_indices sel (nelt);
+ sel.quick_grow (nelt);
if (count == 3)
{
@@ -4568,7 +4569,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
if (3 * i + nelt2 < nelt)
sel[3 * i + nelt2] = 0;
}
- if (!can_vec_perm_p (mode, false, sel))
+ if (!can_vec_perm_p (mode, false, &sel))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
@@ -4585,7 +4586,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
if (3 * i + nelt2 < nelt)
sel[3 * i + nelt2] = nelt + j2++;
}
- if (!can_vec_perm_p (mode, false, sel))
+ if (!can_vec_perm_p (mode, false, &sel))
{
if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION,
@@ -4605,13 +4606,13 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
sel[i * 2] = i;
sel[i * 2 + 1] = i + nelt;
}
- if (can_vec_perm_p (mode, false, sel))
- {
- for (i = 0; i < nelt; i++)
- sel[i] += nelt / 2;
- if (can_vec_perm_p (mode, false, sel))
- return true;
- }
+ if (can_vec_perm_p (mode, false, &sel))
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] += nelt / 2;
+ if (can_vec_perm_p (mode, false, &sel))
+ return true;
+ }
}
}
@@ -4710,7 +4711,9 @@ vect_permute_store_chain (vec<tree> dr_chain,
tree perm3_mask_low, perm3_mask_high;
unsigned int i, n, log_length = exact_log2 (length);
unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+
+ auto_vec_perm_indices sel (nelt);
+ sel.quick_grow (nelt);
result_chain->quick_grow (length);
memcpy (result_chain->address (), dr_chain.address (),
@@ -5132,7 +5135,8 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
if (VECTOR_MODE_P (mode))
{
unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+ auto_vec_perm_indices sel (nelt);
+ sel.quick_grow (nelt);
if (count == 3)
{
@@ -5144,7 +5148,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
sel[i] = 3 * i + k;
else
sel[i] = 0;
- if (!can_vec_perm_p (mode, false, sel))
+ if (!can_vec_perm_p (mode, false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5157,7 +5161,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
sel[i] = i;
else
sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
- if (!can_vec_perm_p (mode, false, sel))
+ if (!can_vec_perm_p (mode, false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5174,11 +5178,11 @@ vect_grouped_load_supported (tree vectype, bool single_element_p,
gcc_assert (pow2p_hwi (count));
for (i = 0; i < nelt; i++)
sel[i] = i * 2;
- if (can_vec_perm_p (mode, false, sel))
+ if (can_vec_perm_p (mode, false, &sel))
{
for (i = 0; i < nelt; i++)
sel[i] = i * 2 + 1;
- if (can_vec_perm_p (mode, false, sel))
+ if (can_vec_perm_p (mode, false, &sel))
return true;
}
}
@@ -5292,7 +5296,9 @@ vect_permute_load_chain (vec<tree> dr_chain,
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i, j, log_length = exact_log2 (length);
unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+
+ auto_vec_perm_indices sel (nelt);
+ sel.quick_grow (nelt);
result_chain->quick_grow (length);
memcpy (result_chain->address (), dr_chain.address (),
@@ -5486,10 +5492,12 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i;
unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ auto_vec_perm_indices sel (nelt);
+ sel.quick_grow (nelt);
+
result_chain->quick_grow (length);
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
@@ -5501,7 +5509,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
sel[i] = i * 2;
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2 + 1;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5515,7 +5523,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
sel[i] = i * 2 + 1;
for (i = 0; i < nelt / 2; ++i)
sel[nelt / 2 + i] = i * 2;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5529,7 +5537,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
For vector length 8 it is {4 5 6 7 8 9 10 11}. */
for (i = 0; i < nelt; i++)
sel[i] = nelt / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5544,7 +5552,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
sel[i] = i;
for (i = nelt / 2; i < nelt; i++)
sel[i] = nelt + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5607,7 +5615,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
sel[i] = 3 * k + (l % 3);
k++;
}
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5621,7 +5629,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
For vector length 8 it is {6 7 8 9 10 11 12 13}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5634,7 +5642,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
For vector length 8 it is {5 6 7 8 9 10 11 12}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + 1 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5647,7 +5655,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
For vector length 8 it is {3 4 5 6 7 8 9 10}. */
for (i = 0; i < nelt; i++)
sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5660,7 +5668,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
For vector length 8 it is {5 6 7 8 9 10 11 12}. */
for (i = 0; i < nelt; i++)
sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 1341d66..b114ff0 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -1300,13 +1300,13 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
if (TREE_CODE (mask) == VECTOR_CST)
{
- unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
+ auto_vec_perm_indices sel_int (elements);
for (i = 0; i < elements; ++i)
- sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
- & (2 * elements - 1));
+ sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
+ & (2 * elements - 1));
- if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
+ if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int))
{
gimple_assign_set_rhs3 (stmt, mask);
update_stmt (stmt);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 3b4a71e..8135219 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3698,15 +3698,15 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
}
/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
- vector elements (not bits) for a vector of mode MODE. */
+ vector elements (not bits) for a vector with NELT elements. */
static void
-calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset,
- unsigned char *sel)
+calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
+ vec_perm_indices *sel)
{
- unsigned int i, nelt = GET_MODE_NUNITS (mode);
+ unsigned int i;
for (i = 0; i < nelt; i++)
- sel[i] = (i + offset) & (2*nelt - 1);
+ sel->quick_push ((i + offset) & (2 * nelt - 1));
}
/* Checks whether the target supports whole-vector shifts for vectors of mode
@@ -3722,12 +3722,13 @@ have_whole_vector_shift (machine_mode mode)
return false;
unsigned int i, nelt = GET_MODE_NUNITS (mode);
- unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
+ auto_vec_perm_indices sel (nelt);
for (i = nelt/2; i >= 1; i/=2)
{
- calc_vec_perm_mask_for_shift (mode, i, sel);
- if (!can_vec_perm_p (mode, false, sel))
+ sel.truncate (0);
+ calc_vec_perm_mask_for_shift (i, nelt, &sel);
+ if (!can_vec_perm_p (mode, false, &sel))
return false;
}
return true;
@@ -5059,7 +5060,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
if (reduce_with_shift && !slp_reduc)
{
int nelements = vec_size_in_bits / element_bitsize;
- unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
+ auto_vec_perm_indices sel (nelements);
int elt_offset;
@@ -5083,8 +5084,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
elt_offset >= 1;
elt_offset /= 2)
{
- calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
- tree mask = vect_gen_perm_mask_any (vectype, sel);
+ sel.truncate (0);
+ calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
+ tree mask = vect_gen_perm_mask_any (vectype, sel);
epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
new_temp, zero_vec, mask);
new_name = make_ssa_name (vec_dest, epilog_stmt);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 32ca6af..32174fe 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -873,15 +873,16 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
if (alt_stmt_code != ERROR_MARK
&& TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
{
- unsigned char *sel
- = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
- for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
+ unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
+ auto_vec_perm_indices sel (count);
+ for (i = 0; i < count; ++i)
{
- sel[i] = i;
+ unsigned int elt = i;
if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
- sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
+ elt += count;
+ sel.quick_push (elt);
}
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
{
for (i = 0; i < group_size; ++i)
if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
@@ -3486,7 +3487,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
int mask_element;
- unsigned char *mask;
machine_mode mode;
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
@@ -3502,7 +3502,8 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
(int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- mask = XALLOCAVEC (unsigned char, nunits);
+ auto_vec_perm_indices mask (nunits);
+ mask.quick_grow (nunits);
/* Initialize the vect stmts of NODE to properly insert the generated
stmts later. */
@@ -3577,7 +3578,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
if (index == nunits)
{
if (! noop_p
- && ! can_vec_perm_p (mode, false, mask))
+ && ! can_vec_perm_p (mode, false, &mask))
{
if (dump_enabled_p ())
{
@@ -3730,15 +3731,15 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
enum tree_code code0 = gimple_assign_rhs_code (stmt);
enum tree_code ocode = ERROR_MARK;
gimple *ostmt;
- unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
+ auto_vec_perm_indices mask (group_size);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
if (gimple_assign_rhs_code (ostmt) != code0)
{
- mask[i] = 1;
+ mask.quick_push (1);
ocode = gimple_assign_rhs_code (ostmt);
}
else
- mask[i] = 0;
+ mask.quick_push (0);
if (ocode != ERROR_MARK)
{
vec<gimple *> v0;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b5f706c..0cee0d4 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1706,15 +1706,14 @@ static tree
perm_mask_for_reverse (tree vectype)
{
int i, nunits;
- unsigned char *sel;
nunits = TYPE_VECTOR_SUBPARTS (vectype);
- sel = XALLOCAVEC (unsigned char, nunits);
+ auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
- sel[i] = nunits - 1 - i;
+ sel.quick_push (nunits - 1 - i);
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
return NULL_TREE;
return vect_gen_perm_mask_checked (vectype, sel);
}
@@ -2171,19 +2170,20 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
modifier = NONE;
else if (nunits == gather_off_nunits / 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
modifier = WIDEN;
+ auto_vec_perm_indices sel (gather_off_nunits);
for (i = 0; i < gather_off_nunits; ++i)
- sel[i] = i | nunits;
+ sel.quick_push (i | nunits);
perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
}
else if (nunits == gather_off_nunits * 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
modifier = NARROW;
+ auto_vec_perm_indices sel (nunits);
+ sel.quick_grow (nunits);
for (i = 0; i < nunits; ++i)
sel[i] = i < gather_off_nunits
? i : i + nunits - gather_off_nunits;
@@ -2481,14 +2481,14 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
- unsigned char *elts = XALLOCAVEC (unsigned char, num_bytes);
- unsigned char *elt = elts;
unsigned word_bytes = num_bytes / nunits;
+
+ auto_vec_perm_indices elts (num_bytes);
for (unsigned i = 0; i < nunits; ++i)
for (unsigned j = 0; j < word_bytes; ++j)
- *elt++ = (i + 1) * word_bytes - j - 1;
+ elts.quick_push ((i + 1) * word_bytes - j - 1);
- if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
+ if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
return false;
if (! vec_stmt)
@@ -5803,22 +5803,22 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
modifier = NONE;
else if (nunits == (unsigned int) scatter_off_nunits / 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
modifier = WIDEN;
+ auto_vec_perm_indices sel (scatter_off_nunits);
for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
- sel[i] = i | nunits;
+ sel.quick_push (i | nunits);
perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
gcc_assert (perm_mask != NULL_TREE);
}
else if (nunits == (unsigned int) scatter_off_nunits * 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
modifier = NARROW;
+ auto_vec_perm_indices sel (nunits);
for (i = 0; i < (unsigned int) nunits; ++i)
- sel[i] = i | scatter_off_nunits;
+ sel.quick_push (i | scatter_off_nunits);
perm_mask = vect_gen_perm_mask_checked (vectype, sel);
gcc_assert (perm_mask != NULL_TREE);
@@ -6503,19 +6503,19 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vect_gen_perm_mask_checked. */
tree
-vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
+vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
{
tree mask_elt_type, mask_type, mask_vec;
- int i, nunits;
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int nunits = sel.length ();
+ gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
mask_elt_type = lang_hooks.types.type_for_mode
(int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
mask_type = get_vectype_for_scalar_type (mask_elt_type);
auto_vec<tree, 32> mask_elts (nunits);
- for (i = 0; i < nunits; ++i)
+ for (unsigned int i = 0; i < nunits; ++i)
mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
mask_vec = build_vector (mask_type, mask_elts);
@@ -6526,9 +6526,9 @@ vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
i.e. that the target supports the pattern _for arbitrary input vectors_. */
tree
-vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
+vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
{
- gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
+ gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
return vect_gen_perm_mask_any (vectype, sel);
}
@@ -6841,22 +6841,22 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
modifier = NONE;
else if (nunits == gather_off_nunits / 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
modifier = WIDEN;
+ auto_vec_perm_indices sel (gather_off_nunits);
for (i = 0; i < gather_off_nunits; ++i)
- sel[i] = i | nunits;
+ sel.quick_push (i | nunits);
perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
}
else if (nunits == gather_off_nunits * 2)
{
- unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
modifier = NARROW;
+ auto_vec_perm_indices sel (nunits);
for (i = 0; i < nunits; ++i)
- sel[i] = i < gather_off_nunits
- ? i : i + nunits - gather_off_nunits;
+ sel.quick_push (i < gather_off_nunits
+ ? i : i + nunits - gather_off_nunits);
perm_mask = vect_gen_perm_mask_checked (vectype, sel);
ncopies *= 2;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4ee3c3f..7ed0078 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1151,8 +1151,8 @@ extern void vect_get_load_cost (struct data_reference *, int, bool,
extern void vect_get_store_cost (struct data_reference *, int,
unsigned int *, stmt_vector_for_cost *);
extern bool vect_supportable_shift (enum tree_code, tree);
-extern tree vect_gen_perm_mask_any (tree, const unsigned char *);
-extern tree vect_gen_perm_mask_checked (tree, const unsigned char *);
+extern tree vect_gen_perm_mask_any (tree, vec_perm_indices);
+extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices);
extern void optimize_mask_stores (struct loop*);
/* In tree-vect-data-refs.c. */