diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 17:57:57 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 17:57:57 +0000 |
commit | 7e11fc7f5cecffe650b672ac1af212d4bd9f1335 (patch) | |
tree | bdbe6dcf4a77cd6837eba958b6c08cef87276a2f /gcc/internal-fn.c | |
parent | abc8eb9a45654662092ce1b6d452c13ee80be954 (diff) | |
download | gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.zip gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.gz gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.bz2 |
Add support for masked load/store_lanes
This patch adds support for vectorising groups of IFN_MASK_LOADs
and IFN_MASK_STOREs using conditional load/store-lanes instructions.
This requires new internal functions to represent the result
(IFN_MASK_{LOAD,STORE}_LANES), as well as associated optabs.
The normal IFN_{LOAD,STORE}_LANES functions are const operations
that logically just perform the permute: the load or store is
encoded as a MEM operand to the call statement. In contrast,
the IFN_MASK_{LOAD,STORE}_LANES functions use the same kind of
interface as IFN_MASK_{LOAD,STORE}, since the memory is only
conditionally accessed.
The AArch64 patterns were added as part of the main LD[234]/ST[234] patch.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/md.texi (vec_mask_load_lanes@var{m}@var{n}): Document.
(vec_mask_store_lanes@var{m}@var{n}): Likewise.
* optabs.def (vec_mask_load_lanes_optab): New optab.
(vec_mask_store_lanes_optab): Likewise.
* internal-fn.def (MASK_LOAD_LANES): New internal function.
(MASK_STORE_LANES): Likewise.
* internal-fn.c (mask_load_lanes_direct): New macro.
(mask_store_lanes_direct): Likewise.
(expand_mask_load_optab_fn): Handle masked operations.
(expand_mask_load_lanes_optab_fn): New macro.
(expand_mask_store_optab_fn): Handle masked operations.
(expand_mask_store_lanes_optab_fn): New macro.
(direct_mask_load_lanes_optab_supported_p): Likewise.
(direct_mask_store_lanes_optab_supported_p): Likewise.
* tree-vectorizer.h (vect_store_lanes_supported): Take a masked_p
parameter.
(vect_load_lanes_supported): Likewise.
* tree-vect-data-refs.c (strip_conversion): New function.
(can_group_stmts_p): Likewise.
(vect_analyze_data_ref_accesses): Use it instead of checking
for a pair of assignments.
(vect_store_lanes_supported): Take a masked_p parameter.
(vect_load_lanes_supported): Likewise.
* tree-vect-loop.c (vect_analyze_loop_2): Update calls to
vect_store_lanes_supported and vect_load_lanes_supported.
* tree-vect-slp.c (vect_analyze_slp_instance): Likewise.
* tree-vect-stmts.c (get_group_load_store_type): Take a masked_p
parameter. Don't allow gaps for masked accesses.
Use vect_get_store_rhs. Update calls to vect_store_lanes_supported
and vect_load_lanes_supported.
(get_load_store_type): Take a masked_p parameter and update
call to get_group_load_store_type.
(vectorizable_store): Update call to get_load_store_type.
Handle IFN_MASK_STORE_LANES.
(vectorizable_load): Update call to get_load_store_type.
Handle IFN_MASK_LOAD_LANES.
gcc/testsuite/
* gcc.dg/vect/vect-ooo-group-1.c: New test.
* gcc.target/aarch64/sve/mask_struct_load_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_1_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_2_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_3_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_6.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_8.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_3.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_store_4.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256620
Diffstat (limited to 'gcc/internal-fn.c')
-rw-r--r-- | gcc/internal-fn.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 6483fe6..d9c7a16 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -82,8 +82,10 @@ init_internal_fns () #define not_direct { -2, -2, false } #define mask_load_direct { -1, 2, false } #define load_lanes_direct { -1, -1, false } +#define mask_load_lanes_direct { -1, -1, false } #define mask_store_direct { 3, 2, false } #define store_lanes_direct { 0, 0, false } +#define mask_store_lanes_direct { 0, 0, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } @@ -2408,7 +2410,7 @@ expand_LOOP_DIST_ALIAS (internal_fn, gcall *) gcc_unreachable (); } -/* Expand MASK_LOAD call STMT using optab OPTAB. */ +/* Expand MASK_LOAD{,_LANES} call STMT using optab OPTAB. */ static void expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) @@ -2417,6 +2419,7 @@ expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) tree type, lhs, rhs, maskt, ptr; rtx mem, target, mask; unsigned align; + insn_code icode; maskt = gimple_call_arg (stmt, 2); lhs = gimple_call_lhs (stmt); @@ -2429,6 +2432,12 @@ expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) type = build_aligned_type (type, align); rhs = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), ptr); + if (optab == vec_mask_load_lanes_optab) + icode = get_multi_vector_move (type, optab); + else + icode = convert_optab_handler (optab, TYPE_MODE (type), + TYPE_MODE (TREE_TYPE (maskt))); + mem = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_WRITE); gcc_assert (MEM_P (mem)); mask = expand_normal (maskt); @@ -2436,12 +2445,12 @@ expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) create_output_operand (&ops[0], target, TYPE_MODE (type)); create_fixed_operand (&ops[1], mem); create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (convert_optab_handler (optab, TYPE_MODE (type), - TYPE_MODE (TREE_TYPE (maskt))), - 3, ops); + expand_insn (icode, 3, ops); } -/* Expand MASK_STORE call STMT using optab OPTAB. */ +#define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn + +/* Expand MASK_STORE{,_LANES} call STMT using optab OPTAB. */ static void expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) @@ -2450,6 +2459,7 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) tree type, lhs, rhs, maskt, ptr; rtx mem, reg, mask; unsigned align; + insn_code icode; maskt = gimple_call_arg (stmt, 2); rhs = gimple_call_arg (stmt, 3); @@ -2460,6 +2470,12 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) type = build_aligned_type (type, align); lhs = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0), ptr); + if (optab == vec_mask_store_lanes_optab) + icode = get_multi_vector_move (type, optab); + else + icode = convert_optab_handler (optab, TYPE_MODE (type), + TYPE_MODE (TREE_TYPE (maskt))); + mem = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); gcc_assert (MEM_P (mem)); mask = expand_normal (maskt); @@ -2467,11 +2483,11 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) create_fixed_operand (&ops[0], mem); create_input_operand (&ops[1], reg, TYPE_MODE (type)); create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (convert_optab_handler (optab, TYPE_MODE (type), - TYPE_MODE (TREE_TYPE (maskt))), - 3, ops); + expand_insn (icode, 3, ops); } +#define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn + static void expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) { @@ -2871,8 +2887,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_binary_optab_supported_p direct_optab_supported_p #define direct_mask_load_optab_supported_p direct_optab_supported_p #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p +#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_store_optab_supported_p direct_optab_supported_p #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p +#define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p /* Return the optab used by internal function FN. */ |