aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2020-03-31 22:48:46 -0500
committerKewen Lin <linkw@linux.ibm.com>2020-04-01 00:21:14 -0500
commitbd0f22a8d5caea8905f38ff1fafce31c1b7d33ad (patch)
treece0c6d209c9fb224c2bbb80c96bb2b50ae1cdd66 /gcc
parent76f09260b7eccd6c3cfa3dcf3c22897fe12a8065 (diff)
downloadgcc-bd0f22a8d5caea8905f38ff1fafce31c1b7d33ad.zip
gcc-bd0f22a8d5caea8905f38ff1fafce31c1b7d33ad.tar.gz
gcc-bd0f22a8d5caea8905f38ff1fafce31c1b7d33ad.tar.bz2
Fix PR94043 by making vect_live_op generate lc-phi
As PR94043 shows, my commit r10-4524 exposed one issue in vectorizable_live_operation, which inserts one extra BB before the single exit, leading unexpected operand expansion and unexpected loop depth assertion. As Richi suggested, this patch is to teach vectorizable_live_operation to generate loop closed phi for vec_lhs, it looks like: loop; # lhs' = PHI <lhs> => loop; # vec_lhs' = PHI <vec_lhs> new_tree = BIT_FIELD_REF <vec_lhs', ...>; lhs' = new_tree; I noticed that there are some SLP cases that have same lhs and vec_lhs but different offsets, which can make us have more PHIs for the same vec_lhs there. But I think it would be fine since only one of them is actually live, the others should be eliminated by the following dce. So the patch doesn't check whether there is one phi for vec_lhs, just create one directly instead. Bootstrapped/regtested on powerpc64le-linux-gnu (LE) P8. 2020-04-01 Kewen Lin <linkw@gcc.gnu.org> gcc/ChangeLog PR tree-optimization/94043 * tree-vect-loop.c (vectorizable_live_operation): Generate loop-closed phi for vec_lhs and use it for lane extraction. gcc/testsuite/ChangeLog PR tree-optimization/94043 * gfortran.dg/graphite/vect-pr94043.f90: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gfortran.dg/graphite/vect-pr94043.f9018
-rw-r--r--gcc/tree-vect-loop.c50
4 files changed, 73 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5bd72cc..fc836b8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-01 Kewen Lin <linkw@gcc.gnu.org>
+
+ PR tree-optimization/94043
+ * tree-vect-loop.c (vectorizable_live_operation): Generate loop-closed
+ phi for vec_lhs and use it for lane extraction.
+
2020-03-31 Felix Yang <felix.yang@huawei.com>
PR tree-optimization/94398
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index adfbdb0..6f41ffb 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-04-01 Kewen Lin <linkw@gcc.gnu.org>
+
+ PR tree-optimization/94043
+ * gfortran.dg/graphite/vect-pr94043.f90: New test.
+
2020-04-01 Iain Buclaw <ibuclaw@gdcproject.org>
PR d/90136
diff --git a/gcc/testsuite/gfortran.dg/graphite/vect-pr94043.f90 b/gcc/testsuite/gfortran.dg/graphite/vect-pr94043.f90
new file mode 100644
index 0000000..744c0f3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/vect-pr94043.f90
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-additional-options "-O3 -ftree-parallelize-loops=2 -fno-tree-dce" }
+
+! As PR94043, test it to be compiled successfully without ICE.
+
+program yw
+ integer :: hx(6, 6)
+ integer :: ps = 1, e2 = 1
+
+ do ps = 1, 6
+ do e2 = 1, 6
+ hx(e2, ps) = 0
+ if (ps >= 5 .and. e2 >= 5) then
+ hx(e2, ps) = hx(1, 1)
+ end if
+ end do
+ end do
+end program
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 73e092c..c9b6534 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7998,6 +7998,25 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
}
+ /* Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI
+ requirement, insert one phi node for it. It looks like:
+ loop;
+ BB:
+ # lhs' = PHI <lhs>
+ ==>
+ loop;
+ BB:
+ # vec_lhs' = PHI <vec_lhs>
+ new_tree = lane_extract <vec_lhs', ...>;
+ lhs' = new_tree; */
+
+ basic_block exit_bb = single_exit (loop)->dest;
+ gcc_assert (single_pred_p (exit_bb));
+
+ tree vec_lhs_phi = copy_ssa_name (vec_lhs);
+ gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
+ SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, vec_lhs);
+
gimple_seq stmts = NULL;
tree new_tree;
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
@@ -8010,10 +8029,10 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
the loop mask for the final iteration. */
gcc_assert (ncopies == 1 && !slp_node);
tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
- tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
- 1, vectype, 0);
- tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST,
- scalar_type, mask, vec_lhs);
+ tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo), 1,
+ vectype, 0);
+ tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type,
+ mask, vec_lhs_phi);
/* Convert the extracted vector element to the required scalar type. */
new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
@@ -8023,13 +8042,32 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
tree bftype = TREE_TYPE (vectype);
if (VECTOR_BOOLEAN_TYPE_P (vectype))
bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
- new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
+ new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs_phi, bitsize, bitstart);
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
}
if (stmts)
- gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
+ {
+ gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
+ gsi_insert_before (&exit_gsi, stmts, GSI_CONTINUE_LINKING);
+
+ /* Remove existing phi from lhs and create one copy from new_tree. */
+ tree lhs_phi = NULL_TREE;
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *phi = gsi_stmt (gsi);
+ if ((gimple_phi_arg_def (phi, 0) == lhs))
+ {
+ remove_phi_node (&gsi, false);
+ lhs_phi = gimple_phi_result (phi);
+ gimple *copy = gimple_build_assign (lhs_phi, new_tree);
+ gsi_insert_after (&exit_gsi, copy, GSI_CONTINUE_LINKING);
+ break;
+ }
+ }
+ }
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary