diff options
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r-- | gcc/tree-vect-stmts.cc | 218 |
1 files changed, 112 insertions, 106 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 20a4bb2..3fa4585 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1422,7 +1422,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, int group_size, vect_memory_access_type memory_access_type, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree scalar_mask, vec<int> *elsvals = nullptr) { @@ -1676,7 +1676,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); - gs_info->offset_dt = vect_constant_def; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -1703,19 +1702,32 @@ static bool vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, bool masked_p, gather_scatter_info *gs_info, - vec<int> *elsvals) + vec<int> *elsvals, + unsigned int group_size, + bool single_element_p) { if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) || gs_info->ifn == IFN_LAST) - return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals); + { + if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, + masked_p, gs_info, elsvals)) + return false; + } + else + { + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - tree old_offset_type = TREE_TYPE (gs_info->offset); - tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + } - gcc_assert (TYPE_PRECISION (new_offset_type) - >= TYPE_PRECISION (old_offset_type)); - gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + if (!single_element_p + && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype), + gs_info->scale, + group_size)) + return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1977,7 +1989,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, separated by the stride, until we have a complete vector. Fall back to scalar accesses if that isn't possible. */ *memory_access_type = VMAT_STRIDED_SLP; - else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + *memory_access_type = VMAT_GATHER_SCATTER; + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, + elsvals)) + gcc_unreachable (); + slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; + tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + gs_info->offset_vectype = offset_vectype; + /* When using internal functions, we rely on pattern recognition + to convert the type of the offset to the type that the target + requires, with the result being a call to an internal function. + If that failed for some reason (e.g. because another pattern + took priority), just handle cases in which the offset already + has the right type. */ + if (GATHER_SCATTER_IFN_P (*gs_info) + && !is_gimple_call (stmt_info->stmt) + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), + TREE_TYPE (offset_vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s offset requires a conversion\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + { + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () + || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (offset_vectype) + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported vector types for emulated " + "gather.\n"); + return false; + } + } + } + else { int cmp = compare_step_with_zero (vinfo, stmt_info); if (cmp < 0) @@ -2221,64 +2275,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && single_element_p && SLP_TREE_LANES (slp_node) == 1 && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals)) + masked_p, gs_info, elsvals, + group_size, single_element_p)) *memory_access_type = VMAT_GATHER_SCATTER; - else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - tree offset; - slp_tree offset_node; - *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - else if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (gs_info->offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node, - &gs_info->offset_dt, - &gs_info->offset_vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s index use not simple.\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) - { - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS - (gs_info->offset_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unsupported vector types for emulated " - "gather.\n"); - return false; - } - } - } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) @@ -2771,7 +2773,7 @@ static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree mask) { tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); @@ -2869,7 +2871,7 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree oprnd, tree mask) { tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); @@ -2950,8 +2952,8 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, containing loop. */ static void -vect_get_gather_scatter_ops (class loop *loop, - slp_tree slp_node, gather_scatter_info *gs_info, +vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, + const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; @@ -2979,7 +2981,7 @@ static void vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -3312,7 +3314,13 @@ vectorizable_call (vec_info *vinfo, int mask_opno = -1; if (internal_fn_p (cfn)) - mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + { + /* We can only handle direct internal masked calls here, + vectorizable_simd_clone_call is for the rest. */ + if (cfn == CFN_MASK_CALL) + return false; + mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + } for (i = 0; i < nargs; i++) { @@ -7090,13 +7098,14 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, tree ref_type; gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); - if ((slp_node && SLP_TREE_LANES (slp_node) > 1) + if (SLP_TREE_LANES (slp_node) > 1 || mask || memory_access_type != VMAT_CONTIGUOUS || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) || loop_vinfo == NULL || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + || LOOP_VINFO_EPILOGUE_P (loop_vinfo) || STMT_VINFO_GROUPED_ACCESS (stmt_info) || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info)) || !integer_zerop (DR_INIT (dr_info->dr)) @@ -7945,7 +7954,6 @@ vectorizable_store (vec_info *vinfo, bool costing_p = cost_vec; if (costing_p) /* transformation not required. */ { - STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; if (loop_vinfo @@ -7976,7 +7984,7 @@ vectorizable_store (vec_info *vinfo, SLP_TREE_TYPE (slp_node) = store_vec_info_type; } - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info)); + gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); /* Transform. */ @@ -10211,29 +10219,6 @@ vectorizable_load (vec_info *vinfo, tree bump; tree vec_offset = NULL_TREE; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - aggr_type = NULL_TREE; - bump = NULL_TREE; - } - else if (memory_access_type == VMAT_GATHER_SCATTER) - { - aggr_type = elem_type; - if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, - &bump, &vec_offset, loop_lens); - } - else - { - if (memory_access_type == VMAT_LOAD_STORE_LANES) - aggr_type = build_array_type_nelts (elem_type, group_size * nunits); - else - aggr_type = vectype; - if (!costing_p) - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); - } auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; @@ -10248,6 +10233,11 @@ vectorizable_load (vec_info *vinfo, gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + aggr_type = build_array_type_nelts (elem_type, group_size * nunits); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ @@ -10409,21 +10399,32 @@ vectorizable_load (vec_info *vinfo, { gcc_assert (!grouped_load && !slp_perm); - unsigned int inside_cost = 0, prologue_cost = 0; - /* 1. Create the vector or array pointer update chain. */ - if (!costing_p) + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + aggr_type = NULL_TREE; + bump = NULL_TREE; + if (!costing_p) vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, &vec_offsets); - else - dataref_ptr - = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, - at_loop, offset, &dummy, gsi, - &ptr_incr, false, bump); + } + else + { + aggr_type = elem_type; + if (!costing_p) + { + vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, + gsi, &gs_info, + &bump, &vec_offset, loop_lens); + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, + at_loop, offset, &dummy, gsi, + &ptr_incr, false, bump); + } } + unsigned int inside_cost = 0, prologue_cost = 0; + gimple *new_stmt = NULL; for (i = 0; i < vec_num; i++) { @@ -10744,6 +10745,11 @@ vectorizable_load (vec_info *vinfo, return true; } + aggr_type = vectype; + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + poly_uint64 group_elt = 0; unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with |