aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-10-25 08:22:13 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-10-25 08:22:13 +0000
commit89d0345ad7b8d84045813972ee60557a6b511c57 (patch)
tree679d68b4d491389b86c0d17c7a69516757266ecd /gcc
parentea133b14f48ed5730748a7e02e322fb07ccc2d85 (diff)
downloadgcc-89d0345ad7b8d84045813972ee60557a6b511c57.zip
gcc-89d0345ad7b8d84045813972ee60557a6b511c57.tar.gz
gcc-89d0345ad7b8d84045813972ee60557a6b511c57.tar.bz2
Fix reductions for fully-masked loops
Now that vectorizable_operation vectorises most loop stmts involved in a reduction, it needs to be aware of reductions in fully-masked loops. The LOOP_VINFO_CAN_FULLY_MASK_P parts of vectorizable_reduction now only apply to cases that use vect_transform_reduction. This new way of doing things is definitely an improvement for SVE though, since it means we can lift the old restriction of not using fully-masked loops for reduction chains. 2019-10-25 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vect-loop.c (vectorizable_reduction): Restrict the LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be handled by vect_transform_reduction. Allow fully-masked loops to be used with reduction chains. * tree-vect-stmts.c (vectorizable_operation): Handle reduction operations in fully-masked loops. (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION operations in fully-masked loops. gcc/testsuite/ * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines for FOLD_EXTRACT_LAST reductions. * gcc.dg/vect/pr65947-2.c: Likewise. * gcc.dg/vect/pr65947-3.c: Likewise. * gcc.dg/vect/pr65947-4.c: Likewise. * gcc.dg/vect/pr65947-5.c: Likewise. * gcc.dg/vect/pr65947-6.c: Likewise. * gcc.dg/vect/pr65947-9.c: Likewise. * gcc.dg/vect/pr65947-10.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-13.c: Likewise. * gcc.dg/vect/pr65947-14.c: Likewise. * gcc.dg/vect/pr80631-1.c: Likewise. * gcc.dg/vect/pr80631-2.c: Likewise. * gcc.dg/vect/vect-cond-reduc-3.c: Likewise. * gcc.dg/vect/vect-cond-reduc-4.c: Likewise. From-SVN: r277438
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/testsuite/ChangeLog19
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-10.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-12.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-13.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-14.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-3.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-5.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-6.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-9.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr80631-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr80631-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c2
-rw-r--r--gcc/tree-vect-loop.c51
-rw-r--r--gcc/tree-vect-stmts.c93
19 files changed, 144 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bb64e93..89448b1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2019-10-25 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-vect-loop.c (vectorizable_reduction): Restrict the
+ LOOP_VINFO_CAN_FULLY_MASK_P handling to cases that will be
+ handled by vect_transform_reduction. Allow fully-masked loops
+ to be used with reduction chains.
+ * tree-vect-stmts.c (vectorizable_operation): Handle reduction
+ operations in fully-masked loops.
+ (vectorizable_condition): Reject EXTRACT_LAST_REDUCTION
+ operations in fully-masked loops.
+
2019-10-25 Richard Biener <rguenther@suse.de>
* tree-vect-loop.c (vectorizable_reduction): Verify
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8e652ab..4a98f6c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,22 @@
+2019-10-25 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/vect/pr65947-1.c: No longer expect doubled dump lines
+ for FOLD_EXTRACT_LAST reductions.
+ * gcc.dg/vect/pr65947-2.c: Likewise.
+ * gcc.dg/vect/pr65947-3.c: Likewise.
+ * gcc.dg/vect/pr65947-4.c: Likewise.
+ * gcc.dg/vect/pr65947-5.c: Likewise.
+ * gcc.dg/vect/pr65947-6.c: Likewise.
+ * gcc.dg/vect/pr65947-9.c: Likewise.
+ * gcc.dg/vect/pr65947-10.c: Likewise.
+ * gcc.dg/vect/pr65947-12.c: Likewise.
+ * gcc.dg/vect/pr65947-13.c: Likewise.
+ * gcc.dg/vect/pr65947-14.c: Likewise.
+ * gcc.dg/vect/pr80631-1.c: Likewise.
+ * gcc.dg/vect/pr80631-2.c: Likewise.
+ * gcc.dg/vect/vect-cond-reduc-3.c: Likewise.
+ * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
+
2019-10-24 Jakub Jelinek <jakub@redhat.com>
* c-c++-common/gomp/declare-variant-8.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index b81baed..8ebc385 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -41,5 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index f37aeca..e4a1d94 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -42,6 +42,6 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
index b84fd41..a47f414 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -42,5 +42,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
index 4ad5262..b0755c0 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -42,4 +42,4 @@ main (void)
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
index d0194f2..c0df587 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
@@ -41,5 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index 18d33c4..58ba5f7 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -42,5 +42,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index 427abdb..6b4077e 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -52,5 +52,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 4055710..99f9765 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -41,6 +41,6 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index c91b648..4e3f765 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -53,5 +53,5 @@ main (void)
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index b072c8d..dde96d7 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -41,5 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
index e43e0e4..1f29530 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
@@ -48,5 +48,5 @@ main ()
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
index b531fe6..f430deb 100644
--- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
@@ -72,5 +72,5 @@ main ()
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
index 07f1a72..ca786f6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
@@ -73,4 +73,4 @@ main ()
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
index a5b3849..de9921c 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
@@ -40,6 +40,6 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
index 6b6d17f..543504f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
@@ -40,6 +40,6 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d0fd7bdb..3b58cee 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6319,38 +6319,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
else
vec_num = 1;
- internal_fn cond_fn = get_conditional_internal_fn (code);
- vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
- bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
-
vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
cost_vec);
- if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- {
- if (reduction_type != FOLD_LEFT_REDUCTION
- && !mask_by_cond_expr
- && (cond_fn == IFN_LAST
- || !direct_internal_fn_supported_p (cond_fn, vectype_in,
- OPTIMIZE_FOR_SPEED)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because no"
- " conditional operation is available.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- }
- else if (reduc_index == -1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop for chained"
- " reductions.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- }
- else
- vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
- vectype_in, NULL);
- }
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
dump_printf_loc (MSG_NOTE, vect_location,
@@ -6367,6 +6337,27 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
}
+ else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ {
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+
+ if (reduction_type != FOLD_LEFT_REDUCTION
+ && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
+ && (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+ OPTIMIZE_FOR_SPEED)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype_in, NULL);
+ }
return true;
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 02b95f51..19ac82f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5929,7 +5929,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out;
- int ncopies;
+ int ncopies, vec_num;
int j, i;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
@@ -6066,9 +6066,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node)
- ncopies = 1;
+ {
+ ncopies = 1;
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ }
else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ {
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ vec_num = 1;
+ }
gcc_assert (ncopies >= 1);
@@ -6121,8 +6127,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false;
}
+ int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
+ internal_fn cond_fn = get_conditional_internal_fn (code);
+
if (!vec_stmt) /* transformation not required. */
{
+ /* If this operation is part of a reduction, a fully-masked loop
+ should only change the active lanes of the reduction chain,
+ keeping the inactive lanes as-is. */
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && reduc_idx >= 0)
+ {
+ if (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype,
+ OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype, NULL);
+ }
+
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
@@ -6135,6 +6167,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
dump_printf_loc (MSG_NOTE, vect_location,
"transform binary/unary operation.\n");
+ bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
/* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
vectors with unsigned elements, but the result is signed. So, we
need to compute the MINUS_EXPR into vectype temporary and
@@ -6252,22 +6286,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op)
? vec_oprnds2[i] : NULL_TREE);
- gassign *new_stmt = gimple_build_assign (vec_dest, code,
- vop0, vop1, vop2);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- new_stmt_info
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- if (vec_cvt_dest)
+ if (masked_loop_p && reduc_idx >= 0)
{
- new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
- gassign *new_stmt
- = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
- new_temp);
- new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+ /* Perform the operation on active elements only and take
+ inactive elements from the reduction chain input. */
+ gcc_assert (!vop2);
+ vop2 = reduc_idx == 1 ? vop1 : vop0;
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i * ncopies + j);
+ gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
+ vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, call);
+ gimple_call_set_lhs (call, new_temp);
+ gimple_call_set_nothrow (call, true);
+ new_stmt_info
+ = vect_finish_stmt_generation (stmt_info, call, gsi);
+ }
+ else
+ {
+ gassign *new_stmt = gimple_build_assign (vec_dest, code,
+ vop0, vop1, vop2);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
new_stmt_info
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ if (vec_cvt_dest)
+ {
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+ gassign *new_stmt
+ = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ new_stmt_info
+ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ }
}
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
@@ -9997,6 +10050,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false;
}
}
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
+ && reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't yet use a fully-masked loop for"
+ " EXTRACT_LAST_REDUCTION.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
if (expand_vec_cond_expr_p (vectype, comp_vectype,
cond_code))
{