diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2019-10-25 08:22:13 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-10-25 08:22:13 +0000 |
commit | 89d0345ad7b8d84045813972ee60557a6b511c57 (patch) | |
tree | 679d68b4d491389b86c0d17c7a69516757266ecd /gcc | |
parent | ea133b14f48ed5730748a7e02e322fb07ccc2d85 (diff) | |
download | gcc-89d0345ad7b8d84045813972ee60557a6b511c57.zip gcc-89d0345ad7b8d84045813972ee60557a6b511c57.tar.gz gcc-89d0345ad7b8d84045813972ee60557a6b511c57.tar.bz2 |
Fix reductions for fully-masked loops
Now that vectorizable_operation vectorises most loop stmts involved
in a reduction, it needs to be aware of reductions in fully-masked loops.
The LOOP_VINFO_CAN_FULLY_MASK_P parts of vectorizable_reduction now only
apply to cases that use vect_transform_reduction.
This new way of doing things is definitely an improvement for SVE though,
since it means we can lift the old restriction of not using fully-masked
loops for reduction chains.
2019-10-25 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vect-loop.c (vectorizable_reduction): Restrict the
LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be
handled by vect_transform_reduction. Allow fully-masked loops
to be used with reduction chains.
* tree-vect-stmts.c (vectorizable_operation): Handle reduction
operations in fully-masked loops.
(vectorizable_condition): Reject EXTRACT_LAST_REDUCTION
operations in fully-masked loops.
gcc/testsuite/
* gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines
for FOLD_EXTRACT_LAST reductions.
* gcc.dg/vect/pr65947-2.c: Likewise.
* gcc.dg/vect/pr65947-3.c: Likewise.
* gcc.dg/vect/pr65947-4.c: Likewise.
* gcc.dg/vect/pr65947-5.c: Likewise.
* gcc.dg/vect/pr65947-6.c: Likewise.
* gcc.dg/vect/pr65947-9.c: Likewise.
* gcc.dg/vect/pr65947-10.c: Likewise.
* gcc.dg/vect/pr65947-12.c: Likewise.
* gcc.dg/vect/pr65947-13.c: Likewise.
* gcc.dg/vect/pr65947-14.c: Likewise.
* gcc.dg/vect/pr80631-1.c: Likewise.
* gcc.dg/vect/pr80631-2.c: Likewise.
* gcc.dg/vect/vect-cond-reduc-3.c: Likewise.
* gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
From-SVN: r277438
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-10.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-12.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-13.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-14.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-5.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-6.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-9.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr80631-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr80631-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c | 2 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 51 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 93 |
19 files changed, 144 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bb64e93..89448b1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-10-25 Richard Sandiford <richard.sandiford@arm.com> + + * tree-vect-loop.c (vectorizable_reduction): Restrict the + LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be + handled by vect_transform_reduction. Allow fully-masked loops + to be used with reduction chains. + * tree-vect-stmts.c (vectorizable_operation): Handle reduction + operations in fully-masked loops. + (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION + operations in fully-masked loops. + 2019-10-25 Richard Biener <rguenther@suse.de> * tree-vect-loop.c (vectorizable_reduction): Verify diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8e652ab..4a98f6c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2019-10-25 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines + for FOLD_EXTRACT_LAST reductions. + * gcc.dg/vect/pr65947-2.c: Likewise. + * gcc.dg/vect/pr65947-3.c: Likewise. + * gcc.dg/vect/pr65947-4.c: Likewise. + * gcc.dg/vect/pr65947-5.c: Likewise. + * gcc.dg/vect/pr65947-6.c: Likewise. + * gcc.dg/vect/pr65947-9.c: Likewise. + * gcc.dg/vect/pr65947-10.c: Likewise. + * gcc.dg/vect/pr65947-12.c: Likewise. + * gcc.dg/vect/pr65947-13.c: Likewise. + * gcc.dg/vect/pr65947-14.c: Likewise. + * gcc.dg/vect/pr80631-1.c: Likewise. + * gcc.dg/vect/pr80631-2.c: Likewise. + * gcc.dg/vect/vect-cond-reduc-3.c: Likewise. + * gcc.dg/vect/vect-cond-reduc-4.c: Likewise. + 2019-10-24 Jakub Jelinek <jakub@redhat.com> * c-c++-common/gomp/declare-variant-8.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c index b81baed..8ebc385 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c index f37aeca..e4a1d94 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c @@ -42,6 +42,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c index b84fd41..a47f414 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c @@ -42,5 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index 4ad5262..b0755c0 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -42,4 +42,4 @@ main (void) /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c index d0194f2..c0df587 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c index 18d33c4..58ba5f7 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c @@ -42,5 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index 427abdb..6b4077e 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -52,5 +52,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c index 4055710..99f9765 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c @@ -41,6 +41,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c index c91b648..4e3f765 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c @@ -53,5 +53,5 @@ main (void) /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c index b072c8d..dde96d7 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c index e43e0e4..1f29530 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c @@ -48,5 +48,5 @@ main () /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c index b531fe6..f430deb 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c @@ -72,5 +72,5 @@ main () } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c index 07f1a72..ca786f6 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c @@ -73,4 +73,4 @@ main () /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c index a5b3849..de9921c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c @@ -40,6 +40,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c index 6b6d17f..543504f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c @@ -40,6 +40,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index d0fd7bdb..3b58cee 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6319,38 +6319,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, else vec_num = 1; - internal_fn cond_fn = get_conditional_internal_fn (code); - vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); - bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in); - vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies, cost_vec); - if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) - { - if (reduction_type != FOLD_LEFT_REDUCTION - && !mask_by_cond_expr - && (cond_fn == IFN_LAST - || !direct_internal_fn_supported_p (cond_fn, vectype_in, - OPTIMIZE_FOR_SPEED))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; - } - else if (reduc_index == -1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop for chained" - " reductions.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; - } - else - vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, - vectype_in, NULL); - } if (dump_enabled_p () && reduction_type == FOLD_LEFT_REDUCTION) dump_printf_loc (MSG_NOTE, vect_location, @@ -6367,6 +6337,27 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; } + else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) + { + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + internal_fn cond_fn = get_conditional_internal_fn (code); + + if (reduction_type != FOLD_LEFT_REDUCTION + && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in) + && (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype_in, + OPTIMIZE_FOR_SPEED))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else + vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vectype_in, NULL); + } return true; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 02b95f51..19ac82f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5929,7 +5929,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, poly_uint64 nunits_in; poly_uint64 nunits_out; tree vectype_out; - int ncopies; + int ncopies, vec_num; int j, i; vec<tree> vec_oprnds0 = vNULL; vec<tree> vec_oprnds1 = vNULL; @@ -6066,9 +6066,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ if (slp_node) - ncopies = 1; + { + ncopies = 1; + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + } else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + { + ncopies = vect_get_num_copies (loop_vinfo, vectype); + vec_num = 1; + } gcc_assert (ncopies >= 1); @@ -6121,8 +6127,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, return false; } + int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); + internal_fn cond_fn = get_conditional_internal_fn (code); + if (!vec_stmt) /* transformation not required. */ { + /* If this operation is part of a reduction, a fully-masked loop + should only change the active lanes of the reduction chain, + keeping the inactive lanes as-is. */ + if (loop_vinfo + && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && reduc_idx >= 0) + { + if (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype, + OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else + vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vectype, NULL); + } + STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec); @@ -6135,6 +6167,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, dump_printf_loc (MSG_NOTE, vect_location, "transform binary/unary operation.\n"); + bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as vectors with unsigned elements, but the result is signed. So, we need to compute the MINUS_EXPR into vectype temporary and @@ -6252,22 +6286,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ? vec_oprnds1[i] : NULL_TREE); vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE); - gassign *new_stmt = gimple_build_assign (vec_dest, code, - vop0, vop1, vop2); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - new_stmt_info - = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - if (vec_cvt_dest) + if (masked_loop_p && reduc_idx >= 0) { - new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); - gassign *new_stmt - = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, - new_temp); - new_temp = make_ssa_name (vec_cvt_dest, new_stmt); + /* Perform the operation on active elements only and take + inactive elements from the reduction chain input. */ + gcc_assert (!vop2); + vop2 = reduc_idx == 1 ? vop1 : vop0; + tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, + vectype, i * ncopies + j); + gcall *call = gimple_build_call_internal (cond_fn, 4, mask, + vop0, vop1, vop2); + new_temp = make_ssa_name (vec_dest, call); + gimple_call_set_lhs (call, new_temp); + gimple_call_set_nothrow (call, true); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, call, gsi); + } + else + { + gassign *new_stmt = gimple_build_assign (vec_dest, code, + vop0, vop1, vop2); + new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + if (vec_cvt_dest) + { + new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp); + gassign *new_stmt + = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR, + new_temp); + new_temp = make_ssa_name (vec_cvt_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + } } if (slp_node) SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); @@ -9997,6 +10050,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, return false; } } + if (loop_vinfo + && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) + && reduction_type == EXTRACT_LAST_REDUCTION) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't yet use a fully-masked loop for" + " EXTRACT_LAST_REDUCTION.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } if (expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) { |