diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 17:57:57 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 17:57:57 +0000 |
commit | 7e11fc7f5cecffe650b672ac1af212d4bd9f1335 (patch) | |
tree | bdbe6dcf4a77cd6837eba958b6c08cef87276a2f /gcc/tree-vect-data-refs.c | |
parent | abc8eb9a45654662092ce1b6d452c13ee80be954 (diff) | |
download | gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.zip gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.gz gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.bz2 |
Add support for masked load/store_lanes
This patch adds support for vectorising groups of IFN_MASK_LOADs
and IFN_MASK_STOREs using conditional load/store-lanes instructions.
This requires new internal functions to represent the result
(IFN_MASK_{LOAD,STORE}_LANES), as well as associated optabs.
The normal IFN_{LOAD,STORE}_LANES functions are const operations
that logically just perform the permute: the load or store is
encoded as a MEM operand to the call statement. In contrast,
the IFN_MASK_{LOAD,STORE}_LANES functions use the same kind of
interface as IFN_MASK_{LOAD,STORE}, since the memory is only
conditionally accessed.
The AArch64 patterns were added as part of the main LD[234]/ST[234] patch.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/md.texi (vec_mask_load_lanes@var{m}@var{n}): Document.
(vec_mask_store_lanes@var{m}@var{n}): Likewise.
* optabs.def (vec_mask_load_lanes_optab): New optab.
(vec_mask_store_lanes_optab): Likewise.
* internal-fn.def (MASK_LOAD_LANES): New internal function.
(MASK_STORE_LANES): Likewise.
* internal-fn.c (mask_load_lanes_direct): New macro.
(mask_store_lanes_direct): Likewise.
(expand_mask_load_optab_fn): Handle masked operations.
(expand_mask_load_lanes_optab_fn): New macro.
(expand_mask_store_optab_fn): Handle masked operations.
(expand_mask_store_lanes_optab_fn): New macro.
(direct_mask_load_lanes_optab_supported_p): Likewise.
(direct_mask_store_lanes_optab_supported_p): Likewise.
* tree-vectorizer.h (vect_store_lanes_supported): Take a masked_p
parameter.
(vect_load_lanes_supported): Likewise.
* tree-vect-data-refs.c (strip_conversion): New function.
(can_group_stmts_p): Likewise.
(vect_analyze_data_ref_accesses): Use it instead of checking
for a pair of assignments.
(vect_store_lanes_supported): Take a masked_p parameter.
(vect_load_lanes_supported): Likewise.
* tree-vect-loop.c (vect_analyze_loop_2): Update calls to
vect_store_lanes_supported and vect_load_lanes_supported.
* tree-vect-slp.c (vect_analyze_slp_instance): Likewise.
* tree-vect-stmts.c (get_group_load_store_type): Take a masked_p
parameter. Don't allow gaps for masked accesses.
Use vect_get_store_rhs. Update calls to vect_store_lanes_supported
and vect_load_lanes_supported.
(get_load_store_type): Take a masked_p parameter and update
call to get_group_load_store_type.
(vectorizable_store): Update call to get_load_store_type.
Handle IFN_MASK_STORE_LANES.
(vectorizable_load): Update call to get_load_store_type.
Handle IFN_MASK_LOAD_LANES.
gcc/testsuite/
* gcc.dg/vect/vect-ooo-group-1.c: New test.
* gcc.target/aarch64/sve/mask_struct_load_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_1_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_2_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_6.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_8.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_4.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256620
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r-- | gcc/tree-vect-data-refs.c | 95 |
1 files changed, 81 insertions, 14 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 759c1e3..23b1084 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2780,6 +2780,62 @@ dr_group_sort_cmp (const void *dra_, const void *drb_) return cmp; } +/* If OP is the result of a conversion, return the unconverted value, + otherwise return null. */ + +static tree +strip_conversion (tree op) +{ + if (TREE_CODE (op) != SSA_NAME) + return NULL_TREE; + gimple *stmt = SSA_NAME_DEF_STMT (op); + if (!is_gimple_assign (stmt) + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))) + return NULL_TREE; + return gimple_assign_rhs1 (stmt); +} + +/* Return true if vectorizable_* routines can handle statements STMT1 + and STMT2 being in a single group. */ + +static bool +can_group_stmts_p (gimple *stmt1, gimple *stmt2) +{ + if (gimple_assign_single_p (stmt1)) + return gimple_assign_single_p (stmt2); + + if (is_gimple_call (stmt1) && gimple_call_internal_p (stmt1)) + { + /* Check for two masked loads or two masked stores. */ + if (!is_gimple_call (stmt2) || !gimple_call_internal_p (stmt2)) + return false; + internal_fn ifn = gimple_call_internal_fn (stmt1); + if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE) + return false; + if (ifn != gimple_call_internal_fn (stmt2)) + return false; + + /* Check that the masks are the same. Cope with casts of masks, + like those created by build_mask_conversion. */ + tree mask1 = gimple_call_arg (stmt1, 2); + tree mask2 = gimple_call_arg (stmt2, 2); + if (!operand_equal_p (mask1, mask2, 0)) + { + mask1 = strip_conversion (mask1); + if (!mask1) + return false; + mask2 = strip_conversion (mask2); + if (!mask2) + return false; + if (!operand_equal_p (mask1, mask2, 0)) + return false; + } + return true; + } + + return false; +} + /* Function vect_analyze_data_ref_accesses. Analyze the access pattern of all the data references in the loop. @@ -2846,8 +2902,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) || data_ref_compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb)) != 0 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0 - || !gimple_assign_single_p (DR_STMT (dra)) - || !gimple_assign_single_p (DR_STMT (drb))) + || !can_group_stmts_p (DR_STMT (dra), DR_STMT (drb))) break; /* Check that the data-refs have the same constant size. */ @@ -4684,15 +4739,21 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) } -/* Return TRUE if vec_store_lanes is available for COUNT vectors of - type VECTYPE. */ +/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of + type VECTYPE. MASKED_P says whether the masked form is needed. */ bool -vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) +vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, + bool masked_p) { - return vect_lanes_optab_supported_p ("vec_store_lanes", - vec_store_lanes_optab, - vectype, count); + if (masked_p) + return vect_lanes_optab_supported_p ("vec_mask_store_lanes", + vec_mask_store_lanes_optab, + vectype, count); + else + return vect_lanes_optab_supported_p ("vec_store_lanes", + vec_store_lanes_optab, + vectype, count); } @@ -5283,15 +5344,21 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, return false; } -/* Return TRUE if vec_load_lanes is available for COUNT vectors of - type VECTYPE. */ +/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of + type VECTYPE. MASKED_P says whether the masked form is needed. */ bool -vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) +vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, + bool masked_p) { - return vect_lanes_optab_supported_p ("vec_load_lanes", - vec_load_lanes_optab, - vectype, count); + if (masked_p) + return vect_lanes_optab_supported_p ("vec_mask_load_lanes", + vec_mask_load_lanes_optab, + vectype, count); + else + return vect_lanes_optab_supported_p ("vec_load_lanes", + vec_load_lanes_optab, + vectype, count); } /* Function vect_permute_load_chain. |