tree-optimization/114375 - disallow SLP discovery of permuted mask loads

We cannot currently handle permutations of mask loads in code generation or permute optimization. But we simply drop any permutation on the floor, so the following instead rejects the SLP build rather than producing wrong-code. I've also made sure to reject them in vectorizable_load for completeness. PR tree-optimization/114375 * tree-vect-slp.cc (vect_build_slp_tree_2): Compute the load permutation for masked loads but reject it when any such is necessary. * tree-vect-stmts.cc (vectorizable_load): Reject masked VMAT_ELEMENTWISE and VMAT_STRIDED_SLP as those are not supported. * gcc.dg/vect/vect-pr114375.c: New testcase.
author: Richard Biener <rguenther@suse.de> 2024-03-18 12:39:03 +0100
committer: Richard Biener <rguenther@suse.de> 2024-03-19 09:02:17 +0100
commit: 94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a (patch)
tree: 79ae6732f7725837190cbf831ebb096c045dd0bb /gcc
parent: 9eeca7753670d7bccd82e6ed7e4fe97cabd9a362 (diff)
download: gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.zip
gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.tar.gz
gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.tar.bz2
3 files changed, 81 insertions, 10 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
new file mode 100644
index 0000000..1e1cb01
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+  for (int i = 0; i < 64; ++i)
+    {
+      int tem = 2, tem2 = 2;
+      if (a[4*i + 1])
+        tem = p[4*i];
+      if (a[4*i])
+        tem2 = p[4*i + 2];
+      b[2*i] = tem2;
+      b[2*i+1] = tem;
+      if (a[4*i + 2])
+        tem = p[4*i + 1];
+      if (a[4*i + 3])
+        tem2 = p[4*i + 3];
+      c[2*i] = tem2;
+      c[2*i+1] = tem;
+    }
+}
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 512; ++i)
+    a[i] = (i >> 1) & 1;
+
+  foo (a);
+
+  if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+      || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 527b06c..23f9593 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1921,12 +1921,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   if (STMT_VINFO_DATA_REF (stmt_info)
       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     {
-      if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-	gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-		    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-		    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
-		    || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD));
-      else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 	gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
       else
 	{
@@ -1943,19 +1938,43 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	  load_permutation.create (group_size);
 	  stmt_vec_info first_stmt_info
 	    = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+	  bool any_permute = false;
 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
 	    {
 	      int load_place;
 	      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 		load_place = vect_get_place_in_interleaving_chain
-				(load_info, first_stmt_info);
+		    (load_info, first_stmt_info);
 	      else
 		load_place = 0;
 	      gcc_assert (load_place != -1);
-	      load_permutation.safe_push (load_place);
+	      any_permute |= load_place != j;
+	      load_permutation.quick_push (load_place);
+	    }
+
+	  if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
+	    {
+	      gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+			  || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+			  || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+			  || gimple_call_internal_p (stmt,
+						     IFN_MASK_LEN_GATHER_LOAD));
+	      load_permutation.release ();
+	      /* We cannot handle permuted masked loads, see PR114375.  */
+	      if (any_permute
+		  || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+		      && DR_GROUP_SIZE (first_stmt_info) != group_size)
+		  || STMT_VINFO_STRIDED_P (stmt_info))
+		{
+		  matches[0] = false;
+		  return NULL;
+		}
+	    }
+	  else
+	    {
+	      SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+	      return node;
 	    }
-	  SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
-	  return node;
 	}
     }
   else if (gimple_assign_single_p (stmt_info->stmt)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index e861743..5a4eb13 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10080,6 +10080,14 @@ vectorizable_load (vec_info *vinfo,
 			     "unsupported masked emulated gather.\n");
 	  return false;
 	}
+      else if (memory_access_type == VMAT_ELEMENTWISE
+	       || memory_access_type == VMAT_STRIDED_SLP)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "unsupported masked strided access.\n");
+	  return false;
+	}
     }
 
   bool costing_p = !vec_stmt;
author	Richard Biener <rguenther@suse.de>	2024-03-18 12:39:03 +0100
committer	Richard Biener <rguenther@suse.de>	2024-03-19 09:02:17 +0100
commit	94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a (patch)
tree	79ae6732f7725837190cbf831ebb096c045dd0bb /gcc
parent	9eeca7753670d7bccd82e6ed7e4fe97cabd9a362 (diff)
download	gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.zip gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.tar.gz gcc-94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a.tar.bz2