diff options
author | Alejandro Martinez <alejandro.martinezvicente@arm.com> | 2019-06-18 08:09:00 +0000 |
---|---|---|
committer | Alejandro Martinez <alejandro@gcc.gnu.org> | 2019-06-18 08:09:00 +0000 |
commit | bce29d65ebe1316d15ec7582a1d257ef1be163f7 (patch) | |
tree | 5e31c8b01fcf9fd667adb0a813ad377f34961fae /gcc/tree-vect-loop.c | |
parent | 9553f0d2216d4475d4d1afaa748b6b02f56c057b (diff) | |
download | gcc-bce29d65ebe1316d15ec7582a1d257ef1be163f7.zip gcc-bce29d65ebe1316d15ec7582a1d257ef1be163f7.tar.gz gcc-bce29d65ebe1316d15ec7582a1d257ef1be163f7.tar.bz2 |
[Vectorizer] Support masking fold left reductions
This patch adds support in the vectorizer for masking fold left reductions.
This avoids the need to insert a conditional assignement with some identity
value.
From-SVN: r272407
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 92a7c29..a27eda6 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5916,6 +5916,30 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest, return lhs; } +/* Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the + type of the vector input. */ + +static internal_fn +get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in) +{ + internal_fn mask_reduc_fn; + + switch (reduc_fn) + { + case IFN_FOLD_LEFT_PLUS: + mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS; + break; + + default: + return IFN_LAST; + } + + if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in, + OPTIMIZE_FOR_SPEED)) + return mask_reduc_fn; + return IFN_LAST; +} + /* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the statement that sets the live-out value. REDUC_DEF_STMT is the phi statement. CODE is the operation performed by STMT_INFO and OPS are @@ -5938,6 +5962,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); stmt_vec_info new_stmt_info = NULL; + internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in); int ncopies; if (slp_node) @@ -6014,16 +6039,21 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, def0 = negated; } - if (mask) + if (mask && mask_reduc_fn == IFN_LAST) def0 = merge_with_identity (gsi, mask, vectype_out, def0, vector_identity); /* On the first iteration the input is simply the scalar phi result, and for subsequent iterations it is the output of the preceding operation. */ - if (reduc_fn != IFN_LAST) + if (reduc_fn != IFN_LAST || (mask && mask_reduc_fn != IFN_LAST)) { - new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0); + if (mask && mask_reduc_fn != IFN_LAST) + new_stmt = gimple_build_call_internal (mask_reduc_fn, 3, reduc_var, + def0, mask); + else + new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, + def0); /* For chained SLP reductions the output of the previous reduction operation serves as the input of the next. For the final statement the output cannot be a temporary - we reuse the original |