diff options
author | Richard Biener <rguenther@suse.de> | 2025-07-28 13:48:39 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2025-08-20 08:53:23 +0200 |
commit | f30aa394e4b14e6d55e349344fe8e0b23e0a7d06 (patch) | |
tree | ebeedff4b1305bde1801a3cf412fd80b7a387522 | |
parent | e78eb2f85b9b61cf193dfc721bbd58966bea76df (diff) | |
download | gcc-f30aa394e4b14e6d55e349344fe8e0b23e0a7d06.zip gcc-f30aa394e4b14e6d55e349344fe8e0b23e0a7d06.tar.gz gcc-f30aa394e4b14e6d55e349344fe8e0b23e0a7d06.tar.bz2 |
Record get_load_store_info results from analysis
The following is a patch to make us record the get_load_store_info
results from load/store analysis and re-use them during transform.
In particular this moves where SLP_TREE_MEMORY_ACCESS_TYPE is stored.
A major hassle was (and still is, to some extent), gather/scatter
handling with it's accompaning gather_scatter_info. As
get_load_store_info no longer fully re-analyzes them but parts of
the information is recorded in the SLP tree during SLP build the
following goes and eliminates the use of this data in
vectorizable_load/store, instead recording the other relevant
part in the load-store info (namely the IFN or decl chosen).
Strided load handling keeps the re-analysis but populates the
data back to the SLP tree and the load-store info. That's something
for further improvement. This also shows that early classifying
a SLP tree as load/store and allocating the load-store data might
be a way to move back all of the gather/scatter auxiliary data
into one place.
Rather than mass-replacing references to variables I've kept the
locals but made them read-only, only adjusting a few elsval setters
and adding a FIXME to strided SLP handling of alignment (allowing
local override there).
The FIXME shows that while a lot of analysis is done in
get_load_store_type that's far from all of it. There's also
a possibility that splitting up the transform phase into
separate load/store def types, based on VMAT choosen, will make
the code more maintainable.
* tree-vectorizer.h (vect_load_store_data): New.
(_slp_tree::memory_access_type): Remove.
(SLP_TREE_MEMORY_ACCESS_TYPE): Turn into inline function.
* tree-vect-slp.cc (_slp_tree::_slp_tree): Do not
initialize SLP_TREE_MEMORY_ACCESS_TYPE.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors):
Remove gather_scatter_info pointer argument, instead get
info from the SLP node.
(vect_build_one_gather_load_call): Get SLP node and builtin
decl as argument and remove uses of gather_scatter_info.
(vect_build_one_scatter_store_call): Likewise.
(vect_get_gather_scatter_ops): Remove uses of gather_scatter_info.
(vect_get_strided_load_store_ops): Get SLP node and remove
uses of gather_scatter_info.
(get_load_store_type): Take pointer to vect_load_store_data
instead of individual pointers.
(vectorizable_store): Adjust. Re-use get_load_store_type
result from analysis time.
(vectorizable_load): Likewise.
-rw-r--r-- | gcc/tree-vect-slp.cc | 1 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 285 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 34 |
3 files changed, 182 insertions, 138 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index d5adb8b..fd793a3 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -126,7 +126,6 @@ _slp_tree::_slp_tree () this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; - SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_UNINITIALIZED; SLP_TREE_REF_COUNT (this) = 1; this->failed = NULL; this->max_nunits = 1; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 33f1dfd..73c15e6 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1428,12 +1428,12 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, slp_tree slp_node, vec_load_store_type vls_type, int group_size, - vect_memory_access_type - memory_access_type, - const gather_scatter_info *gs_info, + vect_load_store_data *ls, slp_tree mask_node, vec<int> *elsvals = nullptr) { + vect_memory_access_type memory_access_type = ls->memory_access_type; + /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) return; @@ -1492,16 +1492,20 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, internal_fn len_ifn = (is_load ? IFN_MASK_LEN_GATHER_LOAD : IFN_MASK_LEN_SCATTER_STORE); + stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node); + tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr) + ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]) + : ls->strided_offset_vectype); + tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr)); + int scale = SLP_TREE_GS_SCALE (slp_node); if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, - gs_info->memory_type, - gs_info->offset_vectype, - gs_info->scale, + memory_type, + off_vectype, scale, elsvals)) vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); else if (internal_gather_scatter_fn_supported_p (ifn, vectype, - gs_info->memory_type, - gs_info->offset_vectype, - gs_info->scale, + memory_type, + off_vectype, scale, elsvals) || memory_access_type == VMAT_GATHER_SCATTER_LEGACY) vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, @@ -1960,14 +1964,15 @@ static bool get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, slp_tree slp_node, bool masked_p, vec_load_store_type vls_type, - vect_memory_access_type *memory_access_type, - poly_int64 *poffset, - dr_alignment_support *alignment_support_scheme, - int *misalignment, - gather_scatter_info *gs_info, - internal_fn *lanes_ifn, - vec<int> *elsvals = nullptr) + vect_load_store_data *ls) { + vect_memory_access_type *memory_access_type = &ls->memory_access_type; + poly_int64 *poffset = &ls->poffset; + dr_alignment_support *alignment_support_scheme + = &ls->alignment_support_scheme; + int *misalignment = &ls->misalignment; + internal_fn *lanes_ifn = &ls->lanes_ifn; + vec<int> *elsvals = &ls->elsvals; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -2025,34 +2030,29 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, { slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); - memset (gs_info, 0, sizeof (gather_scatter_info)); - gs_info->offset_vectype = offset_vectype; - gs_info->scale = SLP_TREE_GS_SCALE (slp_node); - gs_info->base = SLP_TREE_GS_BASE (slp_node); - gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); - gs_info->decl = NULL_TREE; - gs_info->ifn = IFN_LAST; + int scale = SLP_TREE_GS_SCALE (slp_node); + tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); tree tem; if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype, - gs_info->memory_type, - offset_vectype, gs_info->scale, - &gs_info->ifn, &tem, + memory_type, + offset_vectype, scale, + &ls->gs.ifn, &tem, elsvals)) *memory_access_type = VMAT_GATHER_SCATTER_IFN; else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather - && (gs_info->decl + && (ls->gs.decl = targetm.vectorize.builtin_gather (vectype, TREE_TYPE (offset_vectype), - gs_info->scale))) + scale))) : (targetm.vectorize.builtin_scatter - && (gs_info->decl + && (ls->gs.decl = targetm.vectorize.builtin_scatter (vectype, TREE_TYPE (offset_vectype), - gs_info->scale)))) + scale)))) *memory_access_type = VMAT_GATHER_SCATTER_LEGACY; else { @@ -2317,11 +2317,20 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) && SLP_TREE_LANES (slp_node) == 1 - && loop_vinfo - && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals, - group_size, single_element_p)) - *memory_access_type = VMAT_GATHER_SCATTER_IFN; + && loop_vinfo) + { + gather_scatter_info gs_info; + if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, + masked_p, &gs_info, elsvals, + group_size, single_element_p)) + { + SLP_TREE_GS_SCALE (slp_node) = gs_info.scale; + SLP_TREE_GS_BASE (slp_node) = error_mark_node; + ls->gs.ifn = gs_info.ifn; + ls->strided_offset_vectype = gs_info.offset_vectype; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; + } + } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) @@ -2715,13 +2724,12 @@ vect_get_mask_load_else (int elsval, tree type) static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, - tree vectype, - gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, + slp_tree slp_node, tree vectype, + gimple_stmt_iterator *gsi, tree decl, tree ptr, tree offset, tree mask) { - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); + tree rettype = TREE_TYPE (TREE_TYPE (decl)); tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); /* ptrtype */ arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); @@ -2787,8 +2795,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype); } - tree scale = build_int_cst (scaletype, gs_info->scale); - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); + gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op, mask_op, scale); if (!useless_type_conversion_p (vectype, rettype)) @@ -2814,12 +2822,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, + slp_tree slp_node, gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, + tree decl, tree ptr, tree offset, tree oprnd, tree mask) { - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); + tree rettype = TREE_TYPE (TREE_TYPE (decl)); + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist); tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); @@ -2883,9 +2892,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, op = var; } - tree scale = build_int_cst (scaletype, gs_info->scale); + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); gcall *new_stmt - = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale); + = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale); return new_stmt; } @@ -2897,11 +2906,11 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, static void vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, - const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; - *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); + *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node), + &stmts, true, NULL_TREE); if (stmts != NULL) { basic_block new_bb; @@ -2922,10 +2931,10 @@ vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, I * DR_STEP / SCALE. */ static void -vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, +vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node, + tree vectype, tree offset_vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -2966,15 +2975,15 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ - tree offset_type = TREE_TYPE (gs_info->offset_vectype); + tree offset_type = TREE_TYPE (offset_vectype); /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)), - ssize_int (gs_info->scale)); + ssize_int (SLP_TREE_GS_SCALE (node))); step = fold_convert (offset_type, step); /* Create {0, X, X*2, X*3, ...}. */ - tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype, + tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype, build_zero_cst (offset_type), step); *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset); } @@ -7729,7 +7738,6 @@ vectorizable_store (vec_info *vinfo, unsigned int vec_num; bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); tree aggr_type; - gather_scatter_info gs_info; poly_uint64 vf; vec_load_store_type vls_type; tree ref_type; @@ -7814,16 +7822,20 @@ vectorizable_store (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - vls_type, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + vls_type, &_ls_data)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; + const internal_fn lanes_ifn = ls.lanes_ifn; if (slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) @@ -7895,13 +7907,10 @@ vectorizable_store (vec_info *vinfo, bool costing_p = cost_vec; if (costing_p) /* transformation not required. */ { - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - vls_type, group_size, - memory_access_type, &gs_info, + vls_type, group_size, &ls, mask_node); if (!vect_maybe_update_slp_op_vectype (op_node, vectype) @@ -7924,8 +7933,8 @@ vectorizable_store (vec_info *vinfo, "Vectorizing an unaligned access.\n"); SLP_TREE_TYPE (slp_node) = store_vec_info_type; + slp_node->data = new vect_load_store_data (std::move (ls)); } - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); /* Transform. */ @@ -8020,6 +8029,14 @@ vectorizable_store (vec_info *vinfo, ... */ + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; + unsigned nstores = const_nunits; unsigned lnel = 1; tree ltype = elem_type; @@ -8289,8 +8306,9 @@ vectorizable_store (vec_info *vinfo, { aggr_type = elem_type; if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, + ls.strided_offset_vectype, + loop_vinfo, gsi, &bump, &vec_offset, loop_lens); } else @@ -8497,7 +8515,7 @@ vectorizable_store (vec_info *vinfo, vect_get_slp_defs (mask_node, &vec_masks); if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, + vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr, &vec_offsets); else dataref_ptr @@ -8555,9 +8573,9 @@ vectorizable_store (vec_info *vinfo, if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) vec_offset = vec_offsets[j]; - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); - if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) + if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE) { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, @@ -8627,13 +8645,14 @@ vectorizable_store (vec_info *vinfo, continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); poly_uint64 offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); + = TYPE_VECTOR_SUBPARTS (offset_vectype); if (known_eq (nunits, offset_nunits)) { new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, - dataref_ptr, vec_offsets[j], + (vinfo, stmt_info, slp_node, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[j], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -8644,7 +8663,7 @@ vectorizable_store (vec_info *vinfo, lanes but the builtins will store full vectype data from the lower lanes. */ new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, dataref_ptr, vec_offsets[2 * j], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, @@ -8670,14 +8689,14 @@ vectorizable_store (vec_info *vinfo, VEC_UNPACK_HI_EXPR, final_mask); final_mask = make_ssa_name - (truth_type_for (gs_info.offset_vectype)); + (truth_type_for (offset_vectype)); gimple_set_lhs (new_stmt, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, dataref_ptr, vec_offsets[2 * j + 1], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, @@ -8710,8 +8729,8 @@ vectorizable_store (vec_info *vinfo, } new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, - dataref_ptr, vec_offset, + (vinfo, stmt_info, slp_node, gsi, + ls.gs.decl, dataref_ptr, vec_offset, vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -8742,9 +8761,10 @@ vectorizable_store (vec_info *vinfo, continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); unsigned HOST_WIDE_INT const_nunits = nunits.to_constant (); unsigned HOST_WIDE_INT const_offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant (); + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); vec<constructor_elt, va_gc> *ctor_elts; vec_alloc (ctor_elts, const_nunits); gimple_seq stmts = NULL; @@ -8759,7 +8779,7 @@ vectorizable_store (vec_info *vinfo, unsigned elt_offset = (j % factor) * const_nunits; tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); tree ltype = build_aligned_type (TREE_TYPE (vectype), align); for (unsigned k = 0; k < const_nunits; ++k) { @@ -9238,7 +9258,6 @@ vectorizable_load (vec_info *vinfo, bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); poly_uint64 vf; tree aggr_type; - gather_scatter_info gs_info; tree ref_type; enum vect_def_type mask_dt = vect_unknown_def_type; enum vect_def_type els_dt = vect_unknown_def_type; @@ -9372,20 +9391,24 @@ vectorizable_load (vec_info *vinfo, else group_size = 1; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - auto_vec<int> elsvals; - int maskload_elsval = 0; - bool need_zeroing = false; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - VLS_LOAD, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn, &elsvals)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + VLS_LOAD, &ls)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; + const internal_fn lanes_ifn = ls.lanes_ifn; + const vec<int> &elsvals = ls.elsvals; + int maskload_elsval = 0; + bool need_zeroing = false; /* We might need to explicitly zero inactive elements if there are padding bits in the type that might leak otherwise. @@ -9456,7 +9479,7 @@ vectorizable_load (vec_info *vinfo, if (!VECTOR_MODE_P (vec_mode) || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype), - true, NULL, &elsvals)) + true, NULL, &ls.elsvals)) return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES @@ -9498,14 +9521,11 @@ vectorizable_load (vec_info *vinfo, return false; } - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - VLS_LOAD, group_size, - memory_access_type, &gs_info, - mask_node, &elsvals); + VLS_LOAD, group_size, &ls, + mask_node, &ls.elsvals); if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE @@ -9520,16 +9540,7 @@ vectorizable_load (vec_info *vinfo, vinfo->any_known_not_updated_vssa = true; SLP_TREE_TYPE (slp_node) = load_vec_info_type; - } - else - { - /* Here just get the else values. */ - if (loop_vinfo - && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - VLS_LOAD, group_size, - memory_access_type, &gs_info, - mask_node, &elsvals); + slp_node->data = new vect_load_store_data (std::move (ls)); } /* If the type needs padding we must zero inactive elements. @@ -9552,8 +9563,6 @@ vectorizable_load (vec_info *vinfo, if (elsvals.length ()) maskload_elsval = *elsvals.begin (); - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); - if (dump_enabled_p () && !costing_p) dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n"); @@ -9722,6 +9731,13 @@ vectorizable_load (vec_info *vinfo, tree ltype = TREE_TYPE (vectype); tree lvectype = vectype; auto_vec<tree> dr_chain; + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; if (memory_access_type == VMAT_STRIDED_SLP) { HOST_WIDE_INT n = gcd (group_size, const_nunits); @@ -10344,7 +10360,7 @@ vectorizable_load (vec_info *vinfo, aggr_type = NULL_TREE; bump = NULL_TREE; if (!costing_p) - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, + vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr, &vec_offsets); } else @@ -10352,8 +10368,9 @@ vectorizable_load (vec_info *vinfo, aggr_type = elem_type; if (!costing_p) { - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, + ls.strided_offset_vectype, + loop_vinfo, gsi, &bump, &vec_offset, loop_lens); dataref_ptr = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, @@ -10402,9 +10419,9 @@ vectorizable_load (vec_info *vinfo, if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) vec_offset = vec_offsets[i]; tree zero = build_zero_cst (vectype); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); - if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD) + if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD) { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, @@ -10476,13 +10493,14 @@ vectorizable_load (vec_info *vinfo, slp_node, 0, vect_body); continue; } - poly_uint64 offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); + tree offset_vectype = TREE_TYPE (vec_offsets[0]); + poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype); if (known_eq (nunits, offset_nunits)) { new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, vec_offsets[i], final_mask); + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[i], + final_mask); data_ref = NULL_TREE; } else if (known_eq (nunits, offset_nunits * 2)) @@ -10491,8 +10509,9 @@ vectorizable_load (vec_info *vinfo, lanes but the builtins will produce full vectype data with just the lower lanes filled. */ new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, vec_offsets[2 * i], final_mask); + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[2 * i], + final_mask); tree low = make_ssa_name (vectype); gimple_set_lhs (new_stmt, low); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -10523,15 +10542,15 @@ vectorizable_load (vec_info *vinfo, VEC_UNPACK_HI_EXPR, final_mask); final_mask = make_ssa_name - (truth_type_for (gs_info.offset_vectype)); + (truth_type_for (offset_vectype)); gimple_set_lhs (new_stmt, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[2 * i + 1], final_mask); tree high = make_ssa_name (vectype); gimple_set_lhs (new_stmt, high); @@ -10574,7 +10593,8 @@ vectorizable_load (vec_info *vinfo, new_stmt, gsi); } new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offset, final_mask); data_ref = NULL_TREE; } @@ -10603,8 +10623,9 @@ vectorizable_load (vec_info *vinfo, slp_node, 0, vect_body); continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); unsigned HOST_WIDE_INT const_offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant (); + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); vec<constructor_elt, va_gc> *ctor_elts; vec_alloc (ctor_elts, const_nunits); gimple_seq stmts = NULL; @@ -10615,7 +10636,7 @@ vectorizable_load (vec_info *vinfo, vec_offset = vec_offsets[i / factor]; unsigned elt_offset = (i % factor) * const_nunits; tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); tree ltype = build_aligned_type (TREE_TYPE (vectype), align); for (unsigned k = 0; k < const_nunits; ++k) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 3d8a946..b422971 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -270,6 +270,26 @@ struct vect_simd_clone_data : vect_data { auto_vec<tree> simd_clone_info; }; +/* Analysis data from vectorizable_load and vectorizable_store for + load_vec_info_type and store_vec_info_type. */ +struct vect_load_store_data : vect_data { + vect_load_store_data (vect_load_store_data &&other) = default; + vect_load_store_data () = default; + virtual ~vect_load_store_data () = default; + + vect_memory_access_type memory_access_type; + dr_alignment_support alignment_support_scheme; + int misalignment; + internal_fn lanes_ifn; // VMAT_LOAD_STORE_LANES + poly_int64 poffset; + union { + internal_fn ifn; // VMAT_GATHER_SCATTER_IFN + tree decl; // VMAT_GATHER_SCATTER_DECL + } gs; + tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided + auto_vec<int> elsvals; +}; + /* A computation tree of an SLP instance. Each node corresponds to a group of stmts to be packed in a SIMD stmt. */ struct _slp_tree { @@ -332,10 +352,6 @@ struct _slp_tree { int vertex; - /* Classifies how the load or store is going to be implemented - for loop vectorization. */ - vect_memory_access_type memory_access_type; - /* The kind of operation as determined by analysis and optional kind specific data. */ enum stmt_vec_info_type type; @@ -427,12 +443,20 @@ public: #define SLP_TREE_REPRESENTATIVE(S) (S)->representative #define SLP_TREE_LANES(S) (S)->lanes #define SLP_TREE_CODE(S) (S)->code -#define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type #define SLP_TREE_TYPE(S) (S)->type #define SLP_TREE_GS_SCALE(S) (S)->gs_scale #define SLP_TREE_GS_BASE(S) (S)->gs_base #define SLP_TREE_PERMUTE_P(S) ((S)->code == VEC_PERM_EXPR) +inline vect_memory_access_type +SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node) +{ + if (SLP_TREE_TYPE (node) == load_vec_info_type + || SLP_TREE_TYPE (node) == store_vec_info_type) + return static_cast<vect_load_store_data *> (node->data)->memory_access_type; + return VMAT_UNINITIALIZED; +} + enum vect_partial_vector_style { vect_partial_vectors_none, vect_partial_vectors_while_ult, |