vect: Fix an ICE in exact_div [PR95961]

In the test case for PR95961, vectorization factor computed by vect_determine_vectorization_factor is [8,8]. But this is updated to [1,1] later by vect_update_vf_for_slp. When we call vect_get_num_vectors in vect_enhance_data_refs_alignment, the number of scalars which is based on the vectorization factor is not a multiple of the the number of elements in the vector type. This leads to the ICE. This isn't a simple stream of contiguous vector accesses. It's hard to predict from the available information how many vector accesses we'll actually need per iteration. As discussed, here we should use the number of scalars instead of the number of vectors as an upper bound for the loop saving info about DR in the hash table. 2020-07-02 Felix Yang <felix.yang@huawei.com> gcc/ PR tree-optimization/95961 * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Use the number of scalars instead of the number of vectors as an upper bound for the loop saving info about DR in the hash table. Remove unused local variables. gcc/testsuite/ PR tree-optimization/95961 * gcc.target/aarch64/sve/pr95961.c: New test.
author: Fei Yang <felix.yang@huawei.com> 2020-07-02 10:14:33 +0100
committer: Richard Sandiford <richard.sandiford@arm.com> 2020-07-02 10:14:33 +0100
commit: 5c9669a0e6cbf477a03024522943197bdb2682d4 (patch)
tree: 388ec5f43bebd7b754ac9d9df6874cd912a34c31 /gcc
parent: 9d50112acfc01f85fe0fb6d88b329e6122e817b3 (diff)
download: gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.zip
gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.tar.gz
gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.tar.bz2
2 files changed, 22 insertions, 17 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c b/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c
new file mode 100644
index 0000000..b9802c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr95961.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+sve -fno-vect-cost-model" } */
+
+typedef struct {
+    unsigned short mprr_2[5][16][16];
+} ImageParameters;
+int s[16][2];
+void intrapred_luma_16x16(ImageParameters *img, int s0)
+{
+  for (int j=0; j < 16; j++)
+    for (int i=0; i < 16; i++)
+      {
+	img->mprr_2[1 ][j][i]=s[j][1];
+	img->mprr_2[2 ][j][i]=s0;
+      }
+}
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index eb8288e..2462276 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1722,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   dr_vec_info *first_store = NULL;
   dr_vec_info *dr0_info = NULL;
   struct data_reference *dr;
-  unsigned int i, j;
+  unsigned int i;
   bool do_peeling = false;
   bool do_versioning = false;
   unsigned int npeel = 0;
@@ -1730,9 +1730,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   bool one_misalignment_unknown = false;
   bool one_dr_unsupportable = false;
   dr_vec_info *unsupportable_dr_info = NULL;
-  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  unsigned possible_npeel_number = 1;
-  tree vectype;
   unsigned int mis, same_align_drs_max = 0;
   hash_table<peel_info_hasher> peeling_htab (1);
 
@@ -1792,7 +1789,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 	      bool negative = tree_int_cst_compare (DR_STEP (dr),
 						    size_zero_node) < 0;
 
-	      vectype = STMT_VINFO_VECTYPE (stmt_info);
 	      /* If known_alignment_for_access_p then we have set
 	         DR_MISALIGNMENT which is only done if we know it at compiler
 	         time, so it is safe to assume target alignment is constant.
@@ -1819,22 +1815,17 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  vectorization factor.
                  We do this automatically for cost model, since we calculate
 		 cost for every peeling option.  */
+	      poly_uint64 nscalars = npeel_tmp;
               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
 		{
-		  poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
-					  ? vf * DR_GROUP_SIZE (stmt_info) : vf);
-		  possible_npeel_number
-		    = vect_get_num_vectors (nscalars, vectype);
-
-		  /* NPEEL_TMP is 0 when there is no misalignment, but also
-		     allow peeling NELEMENTS.  */
-		  if (DR_MISALIGNMENT (dr_info) == 0)
-		    possible_npeel_number++;
+		  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+		  nscalars = (STMT_SLP_TYPE (stmt_info)
+			      ? vf * DR_GROUP_SIZE (stmt_info) : vf);
 		}
 
 	      /* Save info about DR in the hash table.  Also include peeling
 	         amounts according to the explanation above.  */
-              for (j = 0; j < possible_npeel_number; j++)
+	      while (known_le (npeel_tmp, nscalars))
                 {
                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
 					    dr_info, npeel_tmp);
@@ -2059,8 +2050,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   if (do_peeling)
     {
       stmt_vec_info stmt_info = dr0_info->stmt;
-      vectype = STMT_VINFO_VECTYPE (stmt_info);
-
       if (known_alignment_for_access_p (dr0_info))
         {
 	  bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
author	Fei Yang <felix.yang@huawei.com>	2020-07-02 10:14:33 +0100
committer	Richard Sandiford <richard.sandiford@arm.com>	2020-07-02 10:14:33 +0100
commit	5c9669a0e6cbf477a03024522943197bdb2682d4 (patch)
tree	388ec5f43bebd7b754ac9d9df6874cd912a34c31 /gcc
parent	9d50112acfc01f85fe0fb6d88b329e6122e817b3 (diff)
download	gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.zip gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.tar.gz gcc-5c9669a0e6cbf477a03024522943197bdb2682d4.tar.bz2