aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r--gcc/tree-vect-stmts.cc252
1 files changed, 129 insertions, 123 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index aa2657a..7fe9996 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1422,7 +1422,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
int group_size,
vect_memory_access_type
memory_access_type,
- gather_scatter_info *gs_info,
+ const gather_scatter_info *gs_info,
tree scalar_mask,
vec<int> *elsvals = nullptr)
{
@@ -1676,7 +1676,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
get_object_alignment (DR_REF (dr)));
gs_info->element_type = TREE_TYPE (vectype);
gs_info->offset = fold_convert (offset_type, step);
- gs_info->offset_dt = vect_constant_def;
gs_info->scale = scale;
gs_info->memory_type = memory_type;
return true;
@@ -1703,19 +1702,32 @@ static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info,
- vec<int> *elsvals)
+ vec<int> *elsvals,
+ unsigned int group_size,
+ bool single_element_p)
{
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
- return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals);
+ {
+ if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
+ masked_p, gs_info, elsvals))
+ return false;
+ }
+ else
+ {
+ tree old_offset_type = TREE_TYPE (gs_info->offset);
+ tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
- tree old_offset_type = TREE_TYPE (gs_info->offset);
- tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
+ gcc_assert (TYPE_PRECISION (new_offset_type)
+ >= TYPE_PRECISION (old_offset_type));
+ gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ }
- gcc_assert (TYPE_PRECISION (new_offset_type)
- >= TYPE_PRECISION (old_offset_type));
- gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ if (!single_element_p
+ && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
+ gs_info->scale,
+ group_size))
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1977,7 +1989,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
separated by the stride, until we have a complete vector.
Fall back to scalar accesses if that isn't possible. */
*memory_access_type = VMAT_STRIDED_SLP;
- else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ *memory_access_type = VMAT_GATHER_SCATTER;
+ if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
+ elsvals))
+ gcc_unreachable ();
+ slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
+ tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
+ gs_info->offset_vectype = offset_vectype;
+ /* When using internal functions, we rely on pattern recognition
+ to convert the type of the offset to the type that the target
+ requires, with the result being a call to an internal function.
+ If that failed for some reason (e.g. because another pattern
+ took priority), just handle cases in which the offset already
+ has the right type. */
+ if (GATHER_SCATTER_IFN_P (*gs_info)
+ && !is_gimple_call (stmt_info->stmt)
+ && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
+ offset_vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%s offset requires a conversion\n",
+ vls_type == VLS_LOAD ? "gather" : "scatter");
+ return false;
+ }
+ else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+ {
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
+ || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ }
+ }
+ else
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
@@ -2221,64 +2275,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && single_element_p
&& SLP_TREE_LANES (slp_node) == 1
&& loop_vinfo
&& vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals))
+ masked_p, gs_info, elsvals,
+ group_size, single_element_p))
*memory_access_type = VMAT_GATHER_SCATTER;
- else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- tree offset;
- slp_tree offset_node;
- *memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
- elsvals))
- gcc_unreachable ();
- /* When using internal functions, we rely on pattern recognition
- to convert the type of the offset to the type that the target
- requires, with the result being a call to an internal function.
- If that failed for some reason (e.g. because another pattern
- took priority), just handle cases in which the offset already
- has the right type. */
- else if (GATHER_SCATTER_IFN_P (*gs_info)
- && !is_gimple_call (stmt_info->stmt)
- && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
- TREE_TYPE (gs_info->offset_vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s offset requires a conversion\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node,
- &gs_info->offset_dt,
- &gs_info->offset_vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s index use not simple.\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- else if (GATHER_SCATTER_EMULATED_P (*gs_info))
- {
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
- || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported vector types for emulated "
- "gather.\n");
- return false;
- }
- }
- }
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
|| *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
@@ -2771,7 +2773,7 @@ static gimple *
vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
tree vectype,
gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
+ const gather_scatter_info *gs_info,
tree ptr, tree offset, tree mask)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
@@ -2869,7 +2871,7 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
static gimple *
vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
+ const gather_scatter_info *gs_info,
tree ptr, tree offset, tree oprnd, tree mask)
{
tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
@@ -2950,8 +2952,8 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
containing loop. */
static void
-vect_get_gather_scatter_ops (class loop *loop,
- slp_tree slp_node, gather_scatter_info *gs_info,
+vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
+ const gather_scatter_info *gs_info,
tree *dataref_ptr, vec<tree> *vec_offset)
{
gimple_seq stmts = NULL;
@@ -2979,7 +2981,7 @@ static void
vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
loop_vec_info loop_vinfo,
gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
+ const gather_scatter_info *gs_info,
tree *dataref_bump, tree *vec_offset,
vec_loop_lens *loop_lens)
{
@@ -3158,7 +3160,7 @@ vectorizable_bswap (vec_info *vinfo,
return false;
}
- STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_bswap");
record_stmt_cost (cost_vec,
1, vector_stmt, stmt_info, 0, vect_prologue);
@@ -3312,7 +3314,13 @@ vectorizable_call (vec_info *vinfo,
int mask_opno = -1;
if (internal_fn_p (cfn))
- mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ {
+ /* We can only handle direct internal masked calls here,
+ vectorizable_simd_clone_call is for the rest. */
+ if (cfn == CFN_MASK_CALL)
+ return false;
+ mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ }
for (i = 0; i < nargs; i++)
{
@@ -3487,7 +3495,7 @@ vectorizable_call (vec_info *vinfo,
"incompatible vector types for invariants\n");
return false;
}
- STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_call");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
@@ -4282,7 +4290,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
/* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */
return true;
@@ -5427,13 +5435,13 @@ vectorizable_conversion (vec_info *vinfo,
DUMP_VECT_SCOPE ("vectorizable_conversion");
if (modifier == NONE)
{
- STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
slp_node, cost_vec);
}
else if (modifier == NARROW_SRC || modifier == NARROW_DST)
{
- STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
/* The final packing step produces one vector result per copy. */
unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
@@ -5442,7 +5450,7 @@ vectorizable_conversion (vec_info *vinfo,
}
else
{
- STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
/* The initial unpacking step produces two vector results
per copy. MULTI_STEP_CVT is 0 for a single conversion,
so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
@@ -5777,7 +5785,7 @@ vectorizable_assignment (vec_info *vinfo,
"incompatible vector types for invariants\n");
return false;
}
- STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_assignment");
if (!vect_nop_conversion_p (stmt_info))
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
@@ -6122,7 +6130,7 @@ vectorizable_shift (vec_info *vinfo,
gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
== INTEGER_CST));
}
- STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_shift");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
return true;
@@ -6541,7 +6549,7 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
- STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
if (using_emulated_vectors_p)
@@ -7090,13 +7098,14 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
tree ref_type;
gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
- if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
+ if (SLP_TREE_LANES (slp_node) > 1
|| mask
|| memory_access_type != VMAT_CONTIGUOUS
|| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
|| loop_vinfo == NULL
|| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
|| STMT_VINFO_GROUPED_ACCESS (stmt_info)
|| !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
|| !integer_zerop (DR_INIT (dr_info->dr))
@@ -7945,7 +7954,6 @@ vectorizable_store (vec_info *vinfo,
bool costing_p = cost_vec;
if (costing_p) /* transformation not required. */
{
- STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
if (loop_vinfo
@@ -7974,9 +7982,9 @@ vectorizable_store (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"Vectorizing an unaligned access.\n");
- STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = store_vec_info_type;
}
- gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
+ gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
/* Transform. */
@@ -9572,7 +9580,7 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_LOAD_STORE_LANES)
vinfo->any_known_not_updated_vssa = true;
- STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = load_vec_info_type;
}
else
{
@@ -10211,29 +10219,6 @@ vectorizable_load (vec_info *vinfo,
tree bump;
tree vec_offset = NULL_TREE;
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- aggr_type = NULL_TREE;
- bump = NULL_TREE;
- }
- else if (memory_access_type == VMAT_GATHER_SCATTER)
- {
- aggr_type = elem_type;
- if (!costing_p)
- vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
- gsi, &gs_info,
- &bump, &vec_offset, loop_lens);
- }
- else
- {
- if (memory_access_type == VMAT_LOAD_STORE_LANES)
- aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
- else
- aggr_type = vectype;
- if (!costing_p)
- bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
- memory_access_type, loop_lens);
- }
auto_vec<tree> vec_offsets;
auto_vec<tree> vec_masks;
@@ -10248,6 +10233,11 @@ vectorizable_load (vec_info *vinfo,
gcc_assert (alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
+ aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
+
unsigned int inside_cost = 0, prologue_cost = 0;
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
@@ -10409,21 +10399,32 @@ vectorizable_load (vec_info *vinfo,
{
gcc_assert (!grouped_load && !slp_perm);
- unsigned int inside_cost = 0, prologue_cost = 0;
-
/* 1. Create the vector or array pointer update chain. */
- if (!costing_p)
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ aggr_type = NULL_TREE;
+ bump = NULL_TREE;
+ if (!costing_p)
vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr,
&vec_offsets);
- else
- dataref_ptr
- = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
- at_loop, offset, &dummy, gsi,
- &ptr_incr, false, bump);
+ }
+ else
+ {
+ aggr_type = elem_type;
+ if (!costing_p)
+ {
+ vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
+ gsi, &gs_info,
+ &bump, &vec_offset, loop_lens);
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop, offset, &dummy, gsi,
+ &ptr_incr, false, bump);
+ }
}
+ unsigned int inside_cost = 0, prologue_cost = 0;
+
gimple *new_stmt = NULL;
for (i = 0; i < vec_num; i++)
{
@@ -10744,6 +10745,11 @@ vectorizable_load (vec_info *vinfo,
return true;
}
+ aggr_type = vectype;
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
+
poly_uint64 group_elt = 0;
unsigned int inside_cost = 0, prologue_cost = 0;
/* For costing some adjacent vector loads, we'd like to cost with
@@ -11749,7 +11755,7 @@ vectorizable_condition (vec_info *vinfo,
}
}
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
return true;
}
@@ -12256,7 +12262,7 @@ vectorizable_comparison (vec_info *vinfo,
return false;
if (cost_vec)
- STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
return true;
}
@@ -12657,8 +12663,8 @@ vect_analyze_stmt (vec_info *vinfo,
/* Stmts that are (also) "live" (i.e. - that are used out of the loop)
need extra handling, except for vectorizable reductions. */
if (!bb_vinfo
- && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
- && (STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
+ && SLP_TREE_TYPE (node) != reduc_vec_info_type
+ && (SLP_TREE_TYPE (node) != lc_phi_info_type
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
&& (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR)
&& !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
@@ -12694,7 +12700,7 @@ vect_transform_stmt (vec_info *vinfo,
tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
- switch (STMT_VINFO_TYPE (stmt_info))
+ switch (SLP_TREE_TYPE (slp_node))
{
case type_demotion_vec_info_type:
case type_promotion_vec_info_type:
@@ -12811,7 +12817,7 @@ vect_transform_stmt (vec_info *vinfo,
done = true;
}
- if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type
+ if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
&& (!slp_node->ldst_lanes
|| SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR))
{