diff options
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r-- | gcc/tree-vect-stmts.cc | 246 |
1 files changed, 126 insertions, 120 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index aa2657a..88a12a1 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1422,7 +1422,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, int group_size, vect_memory_access_type memory_access_type, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree scalar_mask, vec<int> *elsvals = nullptr) { @@ -1676,7 +1676,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); - gs_info->offset_dt = vect_constant_def; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -1703,19 +1702,32 @@ static bool vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, bool masked_p, gather_scatter_info *gs_info, - vec<int> *elsvals) + vec<int> *elsvals, + unsigned int group_size, + bool single_element_p) { if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) || gs_info->ifn == IFN_LAST) - return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals); + { + if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, + masked_p, gs_info, elsvals)) + return false; + } + else + { + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - tree old_offset_type = TREE_TYPE (gs_info->offset); - tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + } - gcc_assert (TYPE_PRECISION (new_offset_type) - >= TYPE_PRECISION (old_offset_type)); - gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + if (!single_element_p + && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype), + gs_info->scale, + group_size)) + return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1977,7 +1989,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, separated by the stride, until we have a complete vector. Fall back to scalar accesses if that isn't possible. */ *memory_access_type = VMAT_STRIDED_SLP; - else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + *memory_access_type = VMAT_GATHER_SCATTER; + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, + elsvals)) + gcc_unreachable (); + slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; + tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + gs_info->offset_vectype = offset_vectype; + /* When using internal functions, we rely on pattern recognition + to convert the type of the offset to the type that the target + requires, with the result being a call to an internal function. + If that failed for some reason (e.g. because another pattern + took priority), just handle cases in which the offset already + has the right type. */ + if (GATHER_SCATTER_IFN_P (*gs_info) + && !is_gimple_call (stmt_info->stmt) + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), + offset_vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s offset requires a conversion\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + { + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () + || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (offset_vectype) + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported vector types for emulated " + "gather.\n"); + return false; + } + } + } + else { int cmp = compare_step_with_zero (vinfo, stmt_info); if (cmp < 0) @@ -2221,64 +2275,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && single_element_p && SLP_TREE_LANES (slp_node) == 1 && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals)) + masked_p, gs_info, elsvals, + group_size, single_element_p)) *memory_access_type = VMAT_GATHER_SCATTER; - else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - tree offset; - slp_tree offset_node; - *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - else if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (gs_info->offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node, - &gs_info->offset_dt, - &gs_info->offset_vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s index use not simple.\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) - { - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS - (gs_info->offset_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unsupported vector types for emulated " - "gather.\n"); - return false; - } - } - } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) @@ -2771,7 +2773,7 @@ static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree mask) { tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); @@ -2869,7 +2871,7 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree oprnd, tree mask) { tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); @@ -2950,8 +2952,8 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, containing loop. */ static void -vect_get_gather_scatter_ops (class loop *loop, - slp_tree slp_node, gather_scatter_info *gs_info, +vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, + const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; @@ -2979,7 +2981,7 @@ static void vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -3158,7 +3160,7 @@ vectorizable_bswap (vec_info *vinfo, return false; } - STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_bswap"); record_stmt_cost (cost_vec, 1, vector_stmt, stmt_info, 0, vect_prologue); @@ -3312,7 +3314,13 @@ vectorizable_call (vec_info *vinfo, int mask_opno = -1; if (internal_fn_p (cfn)) - mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + { + /* We can only handle direct internal masked calls here, + vectorizable_simd_clone_call is for the rest. */ + if (cfn == CFN_MASK_CALL) + return false; + mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + } for (i = 0; i < nargs; i++) { @@ -3487,7 +3495,7 @@ vectorizable_call (vec_info *vinfo, "incompatible vector types for invariants\n"); return false; } - STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_call"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); @@ -4282,7 +4290,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; + SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); /* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */ return true; @@ -5427,13 +5435,13 @@ vectorizable_conversion (vec_info *vinfo, DUMP_VECT_SCOPE ("vectorizable_conversion"); if (modifier == NONE) { - STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type; vect_model_simple_cost (vinfo, (1 + multi_step_cvt), slp_node, cost_vec); } else if (modifier == NARROW_SRC || modifier == NARROW_DST) { - STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, @@ -5442,7 +5450,7 @@ vectorizable_conversion (vec_info *vinfo, } else { - STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; + SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type; /* The initial unpacking step produces two vector results per copy. MULTI_STEP_CVT is 0 for a single conversion, so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ @@ -5777,7 +5785,7 @@ vectorizable_assignment (vec_info *vinfo, "incompatible vector types for invariants\n"); return false; } - STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; + SLP_TREE_TYPE (slp_node) = assignment_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_assignment"); if (!vect_nop_conversion_p (stmt_info)) vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); @@ -6122,7 +6130,7 @@ vectorizable_shift (vec_info *vinfo, gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i]) == INTEGER_CST)); } - STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; + SLP_TREE_TYPE (slp_node) = shift_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_shift"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); return true; @@ -6541,7 +6549,7 @@ vectorizable_operation (vec_info *vinfo, return false; } - STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; + SLP_TREE_TYPE (slp_node) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); if (using_emulated_vectors_p) @@ -7974,7 +7982,7 @@ vectorizable_store (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "Vectorizing an unaligned access.\n"); - STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; + SLP_TREE_TYPE (slp_node) = store_vec_info_type; } gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info)); @@ -9572,7 +9580,7 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_LOAD_STORE_LANES) vinfo->any_known_not_updated_vssa = true; - STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; + SLP_TREE_TYPE (slp_node) = load_vec_info_type; } else { @@ -10211,29 +10219,6 @@ vectorizable_load (vec_info *vinfo, tree bump; tree vec_offset = NULL_TREE; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - aggr_type = NULL_TREE; - bump = NULL_TREE; - } - else if (memory_access_type == VMAT_GATHER_SCATTER) - { - aggr_type = elem_type; - if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, - &bump, &vec_offset, loop_lens); - } - else - { - if (memory_access_type == VMAT_LOAD_STORE_LANES) - aggr_type = build_array_type_nelts (elem_type, group_size * nunits); - else - aggr_type = vectype; - if (!costing_p) - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); - } auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; @@ -10248,6 +10233,11 @@ vectorizable_load (vec_info *vinfo, gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + aggr_type = build_array_type_nelts (elem_type, group_size * nunits); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ @@ -10409,21 +10399,32 @@ vectorizable_load (vec_info *vinfo, { gcc_assert (!grouped_load && !slp_perm); - unsigned int inside_cost = 0, prologue_cost = 0; - /* 1. Create the vector or array pointer update chain. */ - if (!costing_p) + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + aggr_type = NULL_TREE; + bump = NULL_TREE; + if (!costing_p) vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, &vec_offsets); - else - dataref_ptr - = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, - at_loop, offset, &dummy, gsi, - &ptr_incr, false, bump); + } + else + { + aggr_type = elem_type; + if (!costing_p) + { + vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, + gsi, &gs_info, + &bump, &vec_offset, loop_lens); + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, + at_loop, offset, &dummy, gsi, + &ptr_incr, false, bump); + } } + unsigned int inside_cost = 0, prologue_cost = 0; + gimple *new_stmt = NULL; for (i = 0; i < vec_num; i++) { @@ -10744,6 +10745,11 @@ vectorizable_load (vec_info *vinfo, return true; } + aggr_type = vectype; + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + poly_uint64 group_elt = 0; unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with @@ -11749,7 +11755,7 @@ vectorizable_condition (vec_info *vinfo, } } - STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; + SLP_TREE_TYPE (slp_node) = condition_vec_info_type; vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind); return true; } @@ -12256,7 +12262,7 @@ vectorizable_comparison (vec_info *vinfo, return false; if (cost_vec) - STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type; + SLP_TREE_TYPE (slp_node) = comparison_vec_info_type; return true; } @@ -12657,8 +12663,8 @@ vect_analyze_stmt (vec_info *vinfo, /* Stmts that are (also) "live" (i.e. - that are used out of the loop) need extra handling, except for vectorizable reductions. */ if (!bb_vinfo - && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type - && (STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type + && SLP_TREE_TYPE (node) != reduc_vec_info_type + && (SLP_TREE_TYPE (node) != lc_phi_info_type || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) && (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR) && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo), @@ -12694,7 +12700,7 @@ vect_transform_stmt (vec_info *vinfo, tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node); - switch (STMT_VINFO_TYPE (stmt_info)) + switch (SLP_TREE_TYPE (slp_node)) { case type_demotion_vec_info_type: case type_promotion_vec_info_type: @@ -12811,7 +12817,7 @@ vect_transform_stmt (vec_info *vinfo, done = true; } - if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type + if (SLP_TREE_TYPE (slp_node) != store_vec_info_type && (!slp_node->ldst_lanes || SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR)) { |