diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 18:01:34 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 18:01:34 +0000 |
commit | bfaa08b7ba1b00bbcc00bb76735c6b3547f5830f (patch) | |
tree | 6fe04d13cf93a02e0cafba41edbff553c1e4de54 /gcc/tree-vect-stmts.c | |
parent | b781a135a06fc1805c072778d7513df09a32171d (diff) | |
download | gcc-bfaa08b7ba1b00bbcc00bb76735c6b3547f5830f.zip gcc-bfaa08b7ba1b00bbcc00bb76735c6b3547f5830f.tar.gz gcc-bfaa08b7ba1b00bbcc00bb76735c6b3547f5830f.tar.bz2 |
Add support for SVE gather loads
This patch adds support for SVE gather loads. It uses the basically
the same analysis code as the AVX gather support, but after that
there are two major differences:
- It uses new internal functions rather than target built-ins.
The interface is:
IFN_GATHER_LOAD (base, offsets scale)
IFN_MASK_GATHER_LOAD (base, offsets scale, mask)
which should be reasonably generic. One of the advantages of
using internal functions is that other passes can understand what
the functions do, but a more immediate advantage is that we can
query the underlying target pattern to see which scales it supports.
- It uses pattern recognition to convert the offset to the right width,
if it was originally narrower than that. This avoids having to do
a widening operation as part of the gather expansion itself.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/md.texi (gather_load@var{m}): Document.
(mask_gather_load@var{m}): Likewise.
* genopinit.c (main): Add supports_vec_gather_load and
supports_vec_gather_load_cached to target_optabs.
* optabs-tree.c (init_tree_optimization_optabs): Use
ggc_cleared_alloc to allocate target_optabs.
* optabs.def (gather_load_optab, mask_gather_laod_optab): New optabs.
* internal-fn.def (GATHER_LOAD, MASK_GATHER_LOAD): New internal
functions.
* internal-fn.h (internal_load_fn_p): Declare.
(internal_gather_scatter_fn_p): Likewise.
(internal_fn_mask_index): Likewise.
(internal_gather_scatter_fn_supported_p): Likewise.
* internal-fn.c (gather_load_direct): New macro.
(expand_gather_load_optab_fn): New function.
(direct_gather_load_optab_supported_p): New macro.
(direct_internal_fn_optab): New function.
(internal_load_fn_p): Likewise.
(internal_gather_scatter_fn_p): Likewise.
(internal_fn_mask_index): Likewise.
(internal_gather_scatter_fn_supported_p): Likewise.
* optabs-query.c (supports_at_least_one_mode_p): New function.
(supports_vec_gather_load_p): Likewise.
* optabs-query.h (supports_vec_gather_load_p): Declare.
* tree-vectorizer.h (gather_scatter_info): Add ifn, element_type
and memory_type field.
(NUM_PATTERNS): Bump to 15.
* tree-vect-data-refs.c: Include internal-fn.h.
(vect_gather_scatter_fn_p): New function.
(vect_describe_gather_scatter_call): Likewise.
(vect_check_gather_scatter): Try using internal functions for
gather loads. Recognize existing calls to a gather load function.
(vect_analyze_data_refs): Consider using gather loads if
supports_vec_gather_load_p.
* tree-vect-patterns.c (vect_get_load_store_mask): New function.
(vect_get_gather_scatter_offset_type): Likewise.
(vect_convert_mask_for_vectype): Likewise.
(vect_add_conversion_to_patterm): Likewise.
(vect_try_gather_scatter_pattern): Likewise.
(vect_recog_gather_scatter_pattern): New pattern recognizer.
(vect_vect_recog_func_ptrs): Add it.
* tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use
internal_fn_mask_index and internal_gather_scatter_fn_p.
(check_load_store_masking): Take the gather_scatter_info as an
argument and handle gather loads.
(vect_get_gather_scatter_ops): New function.
(vectorizable_call): Check internal_load_fn_p.
(vectorizable_load): Likewise. Handle gather load internal
functions.
(vectorizable_store): Update call to check_load_store_masking.
* config/aarch64/aarch64.md (UNSPEC_LD1_GATHER): New unspec.
* config/aarch64/iterators.md (SVE_S, SVE_D): New mode iterators.
* config/aarch64/predicates.md (aarch64_gather_scale_operand_w)
(aarch64_gather_scale_operand_d): New predicates.
* config/aarch64/aarch64-sve.md (gather_load<mode>): New expander.
(mask_gather_load<mode>): New insns.
gcc/testsuite/
* gcc.target/aarch64/sve/gather_load_1.c: New test.
* gcc.target/aarch64/sve/gather_load_2.c: Likewise.
* gcc.target/aarch64/sve/gather_load_3.c: Likewise.
* gcc.target/aarch64/sve/gather_load_4.c: Likewise.
* gcc.target/aarch64/sve/gather_load_5.c: Likewise.
* gcc.target/aarch64/sve/gather_load_6.c: Likewise.
* gcc.target/aarch64/sve/gather_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_1.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_5.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_6.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256640
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 142 |
1 files changed, 116 insertions, 26 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index e4d2051..a308d80 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -391,21 +391,19 @@ exist_non_indexing_operands_for_use_p (tree use, gimple *stmt) { if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) - switch (gimple_call_internal_fn (stmt)) - { - case IFN_MASK_STORE: - operand = gimple_call_arg (stmt, 3); - if (operand == use) - return true; - /* FALLTHRU */ - case IFN_MASK_LOAD: - operand = gimple_call_arg (stmt, 2); - if (operand == use) - return true; - break; - default: - break; - } + { + internal_fn ifn = gimple_call_internal_fn (stmt); + int mask_index = internal_fn_mask_index (ifn); + if (mask_index >= 0 + && use == gimple_call_arg (stmt, mask_index)) + return true; + if (internal_gather_scatter_fn_p (ifn) + && use == gimple_call_arg (stmt, 1)) + return true; + if (ifn == IFN_MASK_STORE + && use == gimple_call_arg (stmt, 3)) + return true; + } return false; } @@ -1727,6 +1725,8 @@ static tree permute_vec_elements (tree, tree, tree, gimple *, is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE says how the load or store is going to be implemented and GROUP_SIZE is the number of load or store statements in the containing group. + If the access is a gather load or scatter store, GS_INFO describes + its arguments. Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not supported, otherwise record the required mask types. */ @@ -1734,7 +1734,8 @@ static tree permute_vec_elements (tree, tree, tree, gimple *, static void check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, vec_load_store_type vls_type, int group_size, - vect_memory_access_type memory_access_type) + vect_memory_access_type memory_access_type, + gather_scatter_info *gs_info) { /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) @@ -1762,6 +1763,29 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype, return; } + if (memory_access_type == VMAT_GATHER_SCATTER) + { + gcc_assert (is_load); + tree offset_type = TREE_TYPE (gs_info->offset); + if (!internal_gather_scatter_fn_supported_p (IFN_MASK_GATHER_LOAD, + vectype, + gs_info->memory_type, + TYPE_SIGN (offset_type), + gs_info->scale)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because the" + " target doesn't have an appropriate masked" + " gather load instruction.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + return; + } + unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype); + vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype); + return; + } + if (memory_access_type != VMAT_CONTIGUOUS && memory_access_type != VMAT_CONTIGUOUS_PERMUTE) { @@ -2563,6 +2587,31 @@ vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi, } } +/* Prepare the base and offset in GS_INFO for vectorization. + Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET + to the vectorized offset argument for the first copy of STMT. STMT + is the statement described by GS_INFO and LOOP is the containing loop. */ + +static void +vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt, + gather_scatter_info *gs_info, + tree *dataref_ptr, tree *vec_offset) +{ + gimple_seq stmts = NULL; + *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); + if (stmts != NULL) + { + basic_block new_bb; + edge pe = loop_preheader_edge (loop); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + tree offset_type = TREE_TYPE (gs_info->offset); + tree offset_vectype = get_vectype_for_scalar_type (offset_type); + *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt, + offset_vectype); +} + /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ static bool @@ -2751,7 +2800,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, return false; if (gimple_call_internal_p (stmt) - && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD + && (internal_load_fn_p (gimple_call_internal_fn (stmt)) || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) /* Handled by vectorizable_load and vectorizable_store. */ return false; @@ -5951,7 +6000,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) check_load_store_masking (loop_vinfo, vectype, vls_type, group_size, - memory_access_type); + memory_access_type, &gs_info); STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; /* The SLP costs are calculated during SLP analysis. */ @@ -6932,7 +6981,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, else { gcall *call = dyn_cast <gcall *> (stmt); - if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD)) + if (!call || !gimple_call_internal_p (call)) + return false; + + internal_fn ifn = gimple_call_internal_fn (call); + if (!internal_load_fn_p (ifn)) return false; scalar_dest = gimple_call_lhs (call); @@ -6947,9 +7000,13 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, return false; } - mask = gimple_call_arg (call, 2); - if (!vect_check_load_store_mask (stmt, mask, &mask_vectype)) - return false; + int mask_index = internal_fn_mask_index (ifn); + if (mask_index >= 0) + { + mask = gimple_call_arg (call, mask_index); + if (!vect_check_load_store_mask (stmt, mask, &mask_vectype)) + return false; + } } if (!STMT_VINFO_DATA_REF (stmt_info)) @@ -7073,7 +7130,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, TYPE_MODE (mask_vectype), true)) return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER) + else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) { tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); tree masktype @@ -7087,7 +7144,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, return false; } } - else if (memory_access_type != VMAT_LOAD_STORE_LANES) + else if (memory_access_type != VMAT_LOAD_STORE_LANES + && memory_access_type != VMAT_GATHER_SCATTER) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7104,7 +7162,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size, - memory_access_type); + memory_access_type, &gs_info); STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; /* The SLP costs are calculated during SLP analysis. */ @@ -7126,7 +7184,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ensure_base_align (dr); - if (memory_access_type == VMAT_GATHER_SCATTER) + if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl) { vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask); return true; @@ -7571,6 +7629,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, aggr_type = vectype; tree vec_mask = NULL_TREE; + tree vec_offset = NULL_TREE; prev_stmt_info = NULL; poly_uint64 group_elt = 0; vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); @@ -7613,6 +7672,12 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, diff); } + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + vect_get_gather_scatter_ops (loop, stmt, &gs_info, + &dataref_ptr, &vec_offset); + inv_p = false; + } else dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, @@ -7628,6 +7693,13 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (dataref_offset) dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, TYPE_SIZE_UNIT (aggr_type)); + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + gimple *def_stmt; + vect_def_type dt; + vect_is_simple_use (vec_offset, loop_vinfo, &def_stmt, &dt); + vec_offset = vect_get_vec_def_for_stmt_copy (dt, vec_offset); + } else dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, TYPE_SIZE_UNIT (aggr_type)); @@ -7716,6 +7788,24 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, { unsigned int align, misalign; + if (memory_access_type == VMAT_GATHER_SCATTER) + { + tree scale = size_int (gs_info.scale); + gcall *call; + if (masked_loop_p) + call = gimple_build_call_internal + (IFN_MASK_GATHER_LOAD, 4, dataref_ptr, + vec_offset, scale, final_mask); + else + call = gimple_build_call_internal + (IFN_GATHER_LOAD, 3, dataref_ptr, + vec_offset, scale); + gimple_call_set_nothrow (call, true); + new_stmt = call; + data_ref = NULL_TREE; + break; + } + align = DR_TARGET_ALIGNMENT (dr); if (alignment_support_scheme == dr_aligned) { |