diff options
-rw-r--r-- | gcc/internal-fn.cc | 7 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.cc | 61 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 11 | ||||
-rw-r--r-- | gcc/tree-vect-slp.cc | 2 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 141 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 4 |
6 files changed, 163 insertions, 63 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 4f2b20a..cc1ede5 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4578,6 +4578,7 @@ internal_load_fn_p (internal_fn fn) case IFN_MASK_LOAD: case IFN_LOAD_LANES: case IFN_MASK_LOAD_LANES: + case IFN_MASK_LEN_LOAD_LANES: case IFN_GATHER_LOAD: case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: @@ -4600,6 +4601,7 @@ internal_store_fn_p (internal_fn fn) case IFN_MASK_STORE: case IFN_STORE_LANES: case IFN_MASK_STORE_LANES: + case IFN_MASK_LEN_STORE_LANES: case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: @@ -4672,6 +4674,8 @@ internal_fn_len_index (internal_fn fn) case IFN_COND_LEN_NEG: case IFN_MASK_LEN_LOAD: case IFN_MASK_LEN_STORE: + case IFN_MASK_LEN_LOAD_LANES: + case IFN_MASK_LEN_STORE_LANES: return 3; default: @@ -4689,8 +4693,10 @@ internal_fn_mask_index (internal_fn fn) { case IFN_MASK_LOAD: case IFN_MASK_LOAD_LANES: + case IFN_MASK_LEN_LOAD_LANES: case IFN_MASK_STORE: case IFN_MASK_STORE_LANES: + case IFN_MASK_LEN_STORE_LANES: case IFN_MASK_LEN_LOAD: case IFN_MASK_LEN_STORE: return 2; @@ -4726,6 +4732,7 @@ internal_fn_stored_value_index (internal_fn fn) return 4; case IFN_MASK_LEN_STORE: + case IFN_MASK_LEN_STORE_LANES: return 5; default: diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index a3570c4..3e9a284 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -5438,22 +5438,31 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) return false; } +/* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors + of type VECTYPE. MASKED_P says whether the masked form is needed. */ -/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of - type VECTYPE. MASKED_P says whether the masked form is needed. */ - -bool +internal_fn vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, bool masked_p) { - if (masked_p) - return vect_lanes_optab_supported_p ("vec_mask_store_lanes", - vec_mask_store_lanes_optab, - vectype, count); + if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes", + vec_mask_len_store_lanes_optab, vectype, + count)) + return IFN_MASK_LEN_STORE_LANES; + else if (masked_p) + { + if (vect_lanes_optab_supported_p ("vec_mask_store_lanes", + vec_mask_store_lanes_optab, vectype, + count)) + return IFN_MASK_STORE_LANES; + } else - return vect_lanes_optab_supported_p ("vec_store_lanes", - vec_store_lanes_optab, - vectype, count); + { + if (vect_lanes_optab_supported_p ("vec_store_lanes", + vec_store_lanes_optab, vectype, count)) + return IFN_STORE_LANES; + } + return IFN_LAST; } @@ -6056,21 +6065,31 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, return false; } -/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of - type VECTYPE. MASKED_P says whether the masked form is needed. */ +/* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors + of type VECTYPE. MASKED_P says whether the masked form is needed. */ -bool +internal_fn vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, bool masked_p) { - if (masked_p) - return vect_lanes_optab_supported_p ("vec_mask_load_lanes", - vec_mask_load_lanes_optab, - vectype, count); + if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes", + vec_mask_len_load_lanes_optab, vectype, + count)) + return IFN_MASK_LEN_LOAD_LANES; + else if (masked_p) + { + if (vect_lanes_optab_supported_p ("vec_mask_load_lanes", + vec_mask_load_lanes_optab, vectype, + count)) + return IFN_MASK_LOAD_LANES; + } else - return vect_lanes_optab_supported_p ("vec_load_lanes", - vec_load_lanes_optab, - vectype, count); + { + if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab, + vectype, count)) + return IFN_LOAD_LANES; + } + return IFN_LAST; } /* Function vect_permute_load_chain. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index bc3063c..1fcd8d0 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2839,7 +2839,8 @@ start_over: instructions record it and move on to the next instance. */ if (loads_permuted && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store - && vect_store_lanes_supported (vectype, group_size, false)) + && vect_store_lanes_supported (vectype, group_size, false) + != IFN_LAST) { FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node) { @@ -2848,9 +2849,9 @@ start_over: /* Use SLP for strided accesses (or if we can't load-lanes). */ if (STMT_VINFO_STRIDED_P (stmt_vinfo) - || ! vect_load_lanes_supported + || vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo), - DR_GROUP_SIZE (stmt_vinfo), false)) + DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST) break; } @@ -3153,7 +3154,7 @@ again: vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); unsigned int size = DR_GROUP_SIZE (vinfo); tree vectype = STMT_VINFO_VECTYPE (vinfo); - if (! vect_store_lanes_supported (vectype, size, false) + if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U) && ! vect_grouped_store_supported (vectype, size)) return opt_result::failure_at (vinfo->stmt, @@ -3165,7 +3166,7 @@ again: bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); size = DR_GROUP_SIZE (vinfo); vectype = STMT_VINFO_VECTYPE (vinfo); - if (! vect_load_lanes_supported (vectype, size, false) + if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST && ! vect_grouped_load_supported (vectype, single_element_p, size)) return opt_result::failure_at (vinfo->stmt, diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 7020bd9..cf1f8f9 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3094,7 +3094,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype)) || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype))) return false; - return vect_store_lanes_supported (vectype, group_size, false); + return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST; } /* Analyze an SLP instance starting from a group of grouped stores. Call diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 86d033a..cd8e0a7 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1610,9 +1610,15 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, bool is_load = (vls_type == VLS_LOAD); if (memory_access_type == VMAT_LOAD_STORE_LANES) { - if (is_load - ? !vect_load_lanes_supported (vectype, group_size, true) - : !vect_store_lanes_supported (vectype, group_size, true)) + internal_fn ifn + = (is_load ? vect_load_lanes_supported (vectype, group_size, true) + : vect_store_lanes_supported (vectype, group_size, true)); + if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES) + vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); + else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES) + vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, + scalar_mask); + else { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1620,10 +1626,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, " the target doesn't have an appropriate" " load/store-lanes instruction.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; - return; } - vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, - scalar_mask); return; } @@ -2074,7 +2077,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, poly_int64 *poffset, dr_alignment_support *alignment_support_scheme, int *misalignment, - gather_scatter_info *gs_info) + gather_scatter_info *gs_info, + internal_fn *lanes_ifn) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -2272,24 +2276,30 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) ; - /* Otherwise try using LOAD/STORE_LANES. */ - else if (vls_type == VLS_LOAD - ? vect_load_lanes_supported (vectype, group_size, masked_p) - : vect_store_lanes_supported (vectype, group_size, - masked_p)) + else { - *memory_access_type = VMAT_LOAD_STORE_LANES; - overrun_p = would_overrun_p; - } + /* Otherwise try using LOAD/STORE_LANES. */ + *lanes_ifn + = vls_type == VLS_LOAD + ? vect_load_lanes_supported (vectype, group_size, masked_p) + : vect_store_lanes_supported (vectype, group_size, + masked_p); + if (*lanes_ifn != IFN_LAST) + { + *memory_access_type = VMAT_LOAD_STORE_LANES; + overrun_p = would_overrun_p; + } - /* If that fails, try using permuting loads. */ - else if (vls_type == VLS_LOAD - ? vect_grouped_load_supported (vectype, single_element_p, - group_size) - : vect_grouped_store_supported (vectype, group_size)) - { - *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; - overrun_p = would_overrun_p; + /* If that fails, try using permuting loads. */ + else if (vls_type == VLS_LOAD + ? vect_grouped_load_supported (vectype, + single_element_p, + group_size) + : vect_grouped_store_supported (vectype, group_size)) + { + *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; + overrun_p = would_overrun_p; + } } } @@ -2378,7 +2388,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, poly_int64 *poffset, dr_alignment_support *alignment_support_scheme, int *misalignment, - gather_scatter_info *gs_info) + gather_scatter_info *gs_info, + internal_fn *lanes_ifn) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); @@ -2441,7 +2452,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, masked_p, vls_type, memory_access_type, poffset, alignment_support_scheme, - misalignment, gs_info)) + misalignment, gs_info, lanes_ifn)) return false; } else if (STMT_VINFO_STRIDED_P (stmt_info)) @@ -3087,11 +3098,8 @@ vect_get_loop_variant_data_ptr_increment ( loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); tree step = vect_dr_behavior (vinfo, dr_info)->step; - /* TODO: We don't support gather/scatter or load_lanes/store_lanes for pointer - IVs are updated by variable amount but we will support them in the future. - */ - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER - && memory_access_type != VMAT_LOAD_STORE_LANES); + /* gather/scatter never reach here. */ + gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); /* When we support SELECT_VL pattern, we dynamic adjust the memory address by .SELECT_VL result. @@ -8094,9 +8102,11 @@ vectorizable_store (vec_info *vinfo, enum dr_alignment_support alignment_support_scheme; int misalignment; poly_int64 poffset; + internal_fn lanes_ifn; if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type, ncopies, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info)) + &alignment_support_scheme, &misalignment, &gs_info, + &lanes_ifn)) return false; if (mask) @@ -8885,6 +8895,8 @@ vectorizable_store (vec_info *vinfo, } tree final_mask = NULL; + tree final_len = NULL; + tree bias = NULL; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, ncopies, vectype, j); @@ -8892,8 +8904,37 @@ vectorizable_store (vec_info *vinfo, final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask, vec_mask, gsi); + if (lanes_ifn == IFN_MASK_LEN_STORE_LANES) + { + if (loop_lens) + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, + ncopies, vectype, j, 1); + else + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); + signed char biasval + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + bias = build_int_cst (intQI_type_node, biasval); + if (!final_mask) + { + mask_vectype = truth_type_for (vectype); + final_mask = build_minus_one_cst (mask_vectype); + } + } + gcall *call; - if (final_mask) + if (final_len && final_mask) + { + /* Emit: + MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, + LEN, BIAS, VEC_ARRAY). */ + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); + tree alias_ptr = build_int_cst (ref_type, align); + call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6, + dataref_ptr, alias_ptr, + final_mask, final_len, bias, + vec_array); + } + else if (final_mask) { /* Emit: MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, @@ -9598,9 +9639,11 @@ vectorizable_load (vec_info *vinfo, enum dr_alignment_support alignment_support_scheme; int misalignment; poly_int64 poffset; + internal_fn lanes_ifn; if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, ncopies, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info)) + &alignment_support_scheme, &misalignment, &gs_info, + &lanes_ifn)) return false; if (mask) @@ -10386,6 +10429,8 @@ vectorizable_load (vec_info *vinfo, tree vec_array = create_vector_array (vectype, vec_num); tree final_mask = NULL_TREE; + tree final_len = NULL_TREE; + tree bias = NULL_TREE; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, ncopies, vectype, j); @@ -10393,8 +10438,36 @@ vectorizable_load (vec_info *vinfo, final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask, vec_mask, gsi); + if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES) + { + if (loop_lens) + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, + ncopies, vectype, j, 1); + else + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); + signed char biasval + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + bias = build_int_cst (intQI_type_node, biasval); + if (!final_mask) + { + mask_vectype = truth_type_for (vectype); + final_mask = build_minus_one_cst (mask_vectype); + } + } + gcall *call; - if (final_mask) + if (final_len && final_mask) + { + /* Emit: + VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, + VEC_MASK, LEN, BIAS). */ + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); + tree alias_ptr = build_int_cst (ref_type, align); + call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5, + dataref_ptr, alias_ptr, + final_mask, final_len, bias); + } + else if (final_mask) { /* Emit: VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1de1449..53a3d78 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2297,9 +2297,9 @@ extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *, extern void vect_copy_ref_info (tree, tree); extern tree vect_create_destination_var (tree, tree); extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); -extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); +extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); -extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); +extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); extern void vect_permute_store_chain (vec_info *, vec<tree> &, unsigned int, stmt_vec_info, gimple_stmt_iterator *, vec<tree> *); |