aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/tree-vect-data-refs.cc180
-rw-r--r--gcc/tree-vect-stmts.cc71
-rw-r--r--gcc/tree-vectorizer.h11
3 files changed, 209 insertions, 53 deletions
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index fb2450a..e8cfb88 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4431,6 +4431,7 @@ struct gather_scatter_config
{
internal_fn ifn;
tree offset_vectype;
+ int scale;
vec<int> elsvals;
};
@@ -4523,38 +4524,62 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
if (!offset_vectype)
continue;
- vec<int> elsvals = vNULL;
+ /* Try multiple scale values. Start with exact match, then try
+ smaller common scales that a target might support . */
+ int scales_to_try[] = {scale, 1, 2, 4, 8};
- /* If we haven't determined which IFN is supported yet, try all three
- to find which one the target supports. */
- if (ifn == IFN_LAST)
+ for (unsigned int j = 0;
+ j < sizeof (scales_to_try) / sizeof (*scales_to_try);
+ j++)
{
- ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
- vectype, memory_type,
- offset_vectype, scale, &elsvals);
- if (ifn != IFN_LAST)
+ int try_scale = scales_to_try[j];
+
+ /* Skip scales >= requested scale (except for exact match). */
+ if (j > 0 && try_scale >= scale)
+ continue;
+
+ /* Skip if requested scale is not a multiple of this scale. */
+ if (j > 0 && scale % try_scale != 0)
+ continue;
+
+ vec<int> elsvals = vNULL;
+
+ /* If we haven't determined which IFN is supported yet, try all three
+ to find which one the target supports. */
+ if (ifn == IFN_LAST)
{
- /* Found which IFN is supported. Save this configuration. */
- gather_scatter_config config;
- config.ifn = ifn;
- config.offset_vectype = offset_vectype;
- config.elsvals = elsvals;
- configs.safe_push (config);
+ ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
+ vectype, memory_type,
+ offset_vectype, try_scale,
+ &elsvals);
+ if (ifn != IFN_LAST)
+ {
+ /* Found which IFN is supported. Save this configuration. */
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.scale = try_scale;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
}
- }
- else
- {
- /* We already know which IFN is supported, just check if this
- offset type works with it. */
- if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
- offset_vectype, scale,
- &elsvals))
+ else
{
- gather_scatter_config config;
- config.ifn = ifn;
- config.offset_vectype = offset_vectype;
- config.elsvals = elsvals;
- configs.safe_push (config);
+ /* We already know which IFN is supported, just check if this
+ offset type and scale work with it. */
+ if (internal_gather_scatter_fn_supported_p (ifn, vectype,
+ memory_type,
+ offset_vectype,
+ try_scale,
+ &elsvals))
+ {
+ gather_scatter_config config;
+ config.ifn = ifn;
+ config.offset_vectype = offset_vectype;
+ config.scale = try_scale;
+ config.elsvals = elsvals;
+ configs.safe_push (config);
+ }
}
}
}
@@ -4570,6 +4595,10 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
base address. If OFFSET_TYPE is scalar the function chooses an
appropriate vector type for it. SCALE is the amount by which the
offset should be multiplied *after* it has been converted to address width.
+ If the target does not support the requested SCALE, SUPPORTED_SCALE
+ will contain the scale that is actually supported
+ (which may be smaller, requiring additional multiplication).
+ Otherwise SUPPORTED_SCALE is 0.
Return true if the function is supported, storing the function id in
*IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
@@ -4582,12 +4611,14 @@ vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
bool
vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree vectype, tree memory_type, tree offset_type,
- int scale, internal_fn *ifn_out,
+ int scale, int *supported_scale,
+ internal_fn *ifn_out,
tree *offset_vectype_out,
tree *supported_offset_vectype,
vec<int> *elsvals)
{
*supported_offset_vectype = NULL_TREE;
+ *supported_scale = 0;
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
unsigned int element_bits = vector_element_bits (vectype);
if (element_bits != memory_bits)
@@ -4609,11 +4640,19 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
if (configs.is_empty ())
return false;
- /* First, try to find a configuration that matches our offset type
- (no conversion needed). */
+ /* Selection priority:
+ 1 - Exact scale match + offset type match
+ 2 - Exact scale match + sign-swapped offset
+ 3 - Smaller scale + offset type match
+ 4 - Smaller scale + sign-swapped offset
+ Within each category, prefer smaller offset types. */
+
+ /* First pass: exact scale match with no conversion. */
for (unsigned int i = 0; i < configs.length (); i++)
{
- if (TYPE_SIGN (configs[i].offset_vectype) == TYPE_SIGN (offset_vectype))
+ if (configs[i].scale == scale
+ && TYPE_SIGN (configs[i].offset_vectype)
+ == TYPE_SIGN (offset_vectype))
{
*ifn_out = configs[i].ifn;
*offset_vectype_out = configs[i].offset_vectype;
@@ -4623,19 +4662,77 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
}
}
- /* No direct match. This means we try to find a sign-swapped offset
- vectype. */
+ /* No direct match. This means we try to find either
+ - a sign-swapped offset vectype or
+ - a different scale and 2x larger offset type
+ - a different scale and larger sign-swapped offset vectype. */
unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
unsigned int needed_precision
= TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+ /* Second pass: No direct match. This means we try to find a sign-swapped
+ offset vectype. */
enum tree_code tmp;
for (unsigned int i = 0; i < configs.length (); i++)
{
unsigned int precision
= TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
- if (precision >= needed_precision
+ if (configs[i].scale == scale
+ && precision >= needed_precision
+ && (supportable_convert_operation (CONVERT_EXPR,
+ configs[i].offset_vectype,
+ offset_vectype, &tmp)
+ || (needed_precision == offset_precision
+ && tree_nop_conversion_p (configs[i].offset_vectype,
+ offset_vectype))))
+ {
+ *ifn_out = configs[i].ifn;
+ *offset_vectype_out = offset_vectype;
+ *supported_offset_vectype = configs[i].offset_vectype;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
+ return true;
+ }
+ }
+
+ /* Third pass: Try a smaller scale with the same signedness. */
+ needed_precision = offset_precision * 2;
+ needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ unsigned int precision
+ = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+ if (configs[i].scale < scale
+ && precision >= needed_precision
+ && (supportable_convert_operation (CONVERT_EXPR,
+ configs[i].offset_vectype,
+ offset_vectype, &tmp)
+ || (needed_precision == offset_precision
+ && tree_nop_conversion_p (configs[i].offset_vectype,
+ offset_vectype))))
+ {
+ *ifn_out = configs[i].ifn;
+ *offset_vectype_out = configs[i].offset_vectype;
+ *supported_scale = configs[i].scale;
+ if (elsvals)
+ *elsvals = configs[i].elsvals;
+ return true;
+ }
+ }
+
+ /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */
+ needed_precision
+ = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
+ needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
+
+ for (unsigned int i = 0; i < configs.length (); i++)
+ {
+ unsigned int precision
+ = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
+ if (configs[i].scale < scale
+ && precision >= needed_precision
&& (supportable_convert_operation (CONVERT_EXPR,
configs[i].offset_vectype,
offset_vectype, &tmp)
@@ -4646,6 +4743,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
*ifn_out = configs[i].ifn;
*offset_vectype_out = offset_vectype;
*supported_offset_vectype = configs[i].offset_vectype;
+ *supported_scale = configs[i].scale;
if (elsvals)
*elsvals = configs[i].elsvals;
return true;
@@ -4805,6 +4903,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
base = fold_convert (sizetype, base);
base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+ int tmp_scale;
tree tmp_offset_vectype;
/* OFF at this point may be either a SSA_NAME or some tree expression
@@ -4878,14 +4977,16 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
signed_char_type_node,
- new_scale, &ifn,
+ new_scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals)
&& !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
unsigned_char_type_node,
- new_scale, &ifn,
+ new_scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals))
@@ -4910,7 +5011,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
&& !POINTER_TYPE_P (TREE_TYPE (off))
&& vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
masked_p, vectype, memory_type,
- TREE_TYPE (off), scale, &ifn,
+ TREE_TYPE (off),
+ scale, &tmp_scale,
+ &ifn,
&offset_vectype,
&tmp_offset_vectype,
elsvals))
@@ -4966,7 +5069,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
if (use_ifn_p)
{
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offtype, scale,
+ vectype, memory_type, offtype,
+ scale, &tmp_scale,
&ifn, &offset_vectype,
&tmp_offset_vectype,
elsvals))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index da093d5..2054f2a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1512,6 +1512,9 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
we chose a different one use this instead. */
if (ls->supported_offset_vectype)
off_vectype = ls->supported_offset_vectype;
+ /* Same for scale. */
+ if (ls->supported_scale)
+ scale = ls->supported_scale;
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
memory_type,
@@ -1706,8 +1709,10 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
no narrower than OFFSET_TYPE. */
tree memory_type = TREE_TYPE (DR_REF (dr));
tree tmp_offset_vectype;
+ int tmp_scale;
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
- vectype, memory_type, offset_type, scale,
+ vectype, memory_type, offset_type,
+ scale, &tmp_scale,
&gs_info->ifn, &gs_info->offset_vectype,
&tmp_offset_vectype, elsvals)
|| gs_info->ifn == IFN_LAST)
@@ -1789,9 +1794,10 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
not available we still have a strided load/store. */
bool ok = false;
tree tmp_vectype;
+ int tmp_scale;
if (vect_gather_scatter_fn_p
(loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
- TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
+ TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
&offset_vectype, &tmp_vectype, elsvals))
ok = true;
else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
@@ -2091,6 +2097,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
bool *slp_perm = &ls->slp_perm;
unsigned *n_perms = &ls->n_perms;
tree *supported_offset_vectype = &ls->supported_offset_vectype;
+ int *supported_scale = &ls->supported_scale;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -2164,7 +2171,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
tree tem;
if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
masked_p, vectype, memory_type,
- offset_vectype, scale,
+ offset_vectype, scale, supported_scale,
&ls->gs.ifn, &tem,
supported_offset_vectype, elsvals))
{
@@ -2179,6 +2186,10 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dump_printf_loc (MSG_NOTE, vect_location,
" target supports offset type %T.\n",
*supported_offset_vectype);
+ if (*supported_scale)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " target supports offset scale %d.\n",
+ *supported_scale);
}
*memory_access_type = VMAT_GATHER_SCATTER_IFN;
}
@@ -2455,7 +2466,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
gcc_assert (vect_gather_scatter_fn_p
(loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
gs_info.memory_type, TREE_TYPE (gs_info.offset),
- gs_info.scale, &gs_info.ifn,
+ gs_info.scale, supported_scale, &gs_info.ifn,
&tmp, supported_offset_vectype, elsvals));
SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
@@ -8850,6 +8861,10 @@ vectorizable_store (vec_info *vinfo,
inside_cost
+= record_stmt_cost (cost_vec, 1, vector_stmt,
slp_node, 0, vect_body);
+ if (ls.supported_scale)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
@@ -8864,12 +8879,26 @@ vectorizable_store (vec_info *vinfo,
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
- /* Perform the offset conversion if necessary. */
- if (!strided && ls.supported_offset_vectype)
+ /* Perform the offset conversion and scaling if necessary. */
+ if (!strided
+ && (ls.supported_offset_vectype || ls.supported_scale))
{
gimple_seq stmts = NULL;
- vec_offset = gimple_convert
- (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_offset_vectype)
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_scale)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ scale = size_int (ls.supported_scale);
+ }
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
}
@@ -10691,6 +10720,10 @@ vectorizable_load (vec_info *vinfo,
inside_cost
+= record_stmt_cost (cost_vec, 1, vector_stmt,
slp_node, 0, vect_body);
+ if (ls.supported_scale)
+ inside_cost
+ += record_stmt_cost (cost_vec, 1, vector_stmt,
+ slp_node, 0, vect_body);
unsigned int cnunits = vect_nunits_for_cost (vectype);
inside_cost
@@ -10704,12 +10737,26 @@ vectorizable_load (vec_info *vinfo,
tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
- /* Perform the offset conversion if necessary. */
- if (!strided && ls.supported_offset_vectype)
+ /* Perform the offset conversion and scaling if necessary. */
+ if (!strided
+ && (ls.supported_offset_vectype || ls.supported_scale))
{
gimple_seq stmts = NULL;
- vec_offset = gimple_convert
- (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_offset_vectype)
+ vec_offset = gimple_convert
+ (&stmts, ls.supported_offset_vectype, vec_offset);
+ if (ls.supported_scale)
+ {
+ tree mult_cst = build_int_cst
+ (TREE_TYPE (TREE_TYPE (vec_offset)),
+ SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+ tree mult = build_vector_from_val
+ (TREE_TYPE (vec_offset), mult_cst);
+ vec_offset = gimple_build
+ (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+ vec_offset, mult);
+ scale = size_int (ls.supported_scale);
+ }
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b940a76..b7f3297 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -290,9 +290,14 @@ struct vect_load_store_data : vect_data {
tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
tree ls_type; // VMAT_GATHER_SCATTER_IFN
/* This is set to a supported offset vector type if we don't support the
- originally requested offset type. In that case there will be an
- additional offset conversion before the gather/scatter. */
+ originally requested offset type, otherwise NULL.
+ If nonzero there will be an additional offset conversion before
+ the gather/scatter. */
tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
+ /* Similar for scale. Only nonzero if we don't support the requested
+ scale. Then we need to multiply the offset vector before the
+ gather/scatter. */
+ int supported_scale; // VMAT_GATHER_SCATTER_IFN
auto_vec<int> elsvals;
/* True if the load requires a load permutation. */
bool slp_perm; // SLP_TREE_LOAD_PERMUTATION
@@ -2592,7 +2597,7 @@ extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
- tree, int, internal_fn *, tree *,
+ tree, int, int *, internal_fn *, tree *,
tree *, vec<int> * = nullptr);
extern bool vect_check_gather_scatter (stmt_vec_info, tree,
loop_vec_info, gather_scatter_info *,