Add support for masked load/store_lanes

This patch adds support for vectorising groups of IFN_MASK_LOADs and IFN_MASK_STOREs using conditional load/store-lanes instructions. This requires new internal functions to represent the result (IFN_MASK_{LOAD,STORE}_LANES), as well as associated optabs. The normal IFN_{LOAD,STORE}_LANES functions are const operations that logically just perform the permute: the load or store is encoded as a MEM operand to the call statement. In contrast, the IFN_MASK_{LOAD,STORE}_LANES functions use the same kind of interface as IFN_MASK_{LOAD,STORE}, since the memory is only conditionally accessed. The AArch64 patterns were added as part of the main LD[234]/ST[234] patch. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/md.texi (vec_mask_load_lanes@var{m}@var{n}): Document. (vec_mask_store_lanes@var{m}@var{n}): Likewise. * optabs.def (vec_mask_load_lanes_optab): New optab. (vec_mask_store_lanes_optab): Likewise. * internal-fn.def (MASK_LOAD_LANES): New internal function. (MASK_STORE_LANES): Likewise. * internal-fn.c (mask_load_lanes_direct): New macro. (mask_store_lanes_direct): Likewise. (expand_mask_load_optab_fn): Handle masked operations. (expand_mask_load_lanes_optab_fn): New macro. (expand_mask_store_optab_fn): Handle masked operations. (expand_mask_store_lanes_optab_fn): New macro. (direct_mask_load_lanes_optab_supported_p): Likewise. (direct_mask_store_lanes_optab_supported_p): Likewise. * tree-vectorizer.h (vect_store_lanes_supported): Take a masked_p parameter. (vect_load_lanes_supported): Likewise. * tree-vect-data-refs.c (strip_conversion): New function. (can_group_stmts_p): Likewise. (vect_analyze_data_ref_accesses): Use it instead of checking for a pair of assignments. (vect_store_lanes_supported): Take a masked_p parameter. (vect_load_lanes_supported): Likewise. * tree-vect-loop.c (vect_analyze_loop_2): Update calls to vect_store_lanes_supported and vect_load_lanes_supported. * tree-vect-slp.c (vect_analyze_slp_instance): Likewise. * tree-vect-stmts.c (get_group_load_store_type): Take a masked_p parameter. Don't allow gaps for masked accesses. Use vect_get_store_rhs. Update calls to vect_store_lanes_supported and vect_load_lanes_supported. (get_load_store_type): Take a masked_p parameter and update call to get_group_load_store_type. (vectorizable_store): Update call to get_load_store_type. Handle IFN_MASK_STORE_LANES. (vectorizable_load): Update call to get_load_store_type. Handle IFN_MASK_LOAD_LANES. gcc/testsuite/ * gcc.dg/vect/vect-ooo-group-1.c: New test. * gcc.target/aarch64/sve/mask_struct_load_1.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_1_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_2_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_3_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_6.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_7.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_8.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_3.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_4.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256620
author: Richard Sandiford <richard.sandiford@linaro.org> 2018-01-13 17:57:57 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2018-01-13 17:57:57 +0000
commit: 7e11fc7f5cecffe650b672ac1af212d4bd9f1335 (patch)
tree: bdbe6dcf4a77cd6837eba958b6c08cef87276a2f /gcc/tree-vect-stmts.c
parent: abc8eb9a45654662092ce1b6d452c13ee80be954 (diff)
download: gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.zip
gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.gz
gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.bz2
1 files changed, 67 insertions, 29 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 50b35fc..d9d747a 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1757,7 +1757,7 @@ vect_get_store_rhs (gimple *stmt)
 
 static bool
 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
-			   vec_load_store_type vls_type,
+			   bool masked_p, vec_load_store_type vls_type,
 			   vect_memory_access_type *memory_access_type)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -1778,7 +1778,10 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
 
   /* True if we can cope with such overrun by peeling for gaps, so that
      there is at least one final scalar iteration after the vector loop.  */
-  bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
+  bool can_overrun_p = (!masked_p
+			&& vls_type == VLS_LOAD
+			&& loop_vinfo
+			&& !loop->inner);
 
   /* There can only be a gap at the end of the group if the stride is
      known at compile time.  */
@@ -1841,6 +1844,7 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
 	 and so we are guaranteed to access a non-gap element in the
 	 same B-sized block.  */
       if (would_overrun_p
+	  && !masked_p
 	  && gap < (vect_known_alignment_in_bytes (first_dr)
 		    / vect_get_scalar_dr_size (first_dr)))
 	would_overrun_p = false;
@@ -1857,8 +1861,9 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
 	  /* Otherwise try using LOAD/STORE_LANES.  */
 	  if (*memory_access_type == VMAT_ELEMENTWISE
 	      && (vls_type == VLS_LOAD
-		  ? vect_load_lanes_supported (vectype, group_size)
-		  : vect_store_lanes_supported (vectype, group_size)))
+		  ? vect_load_lanes_supported (vectype, group_size, masked_p)
+		  : vect_store_lanes_supported (vectype, group_size,
+						masked_p)))
 	    {
 	      *memory_access_type = VMAT_LOAD_STORE_LANES;
 	      overrun_p = would_overrun_p;
@@ -1884,8 +1889,7 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
       while (next_stmt)
 	{
-	  gcc_assert (gimple_assign_single_p (next_stmt));
-	  tree op = gimple_assign_rhs1 (next_stmt);
+	  tree op = vect_get_store_rhs (next_stmt);
 	  gimple *def_stmt;
 	  enum vect_def_type dt;
 	  if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
@@ -1969,11 +1973,12 @@ get_negative_load_store_type (gimple *stmt, tree vectype,
    or scatters, fill in GS_INFO accordingly.
 
    SLP says whether we're performing SLP rather than loop vectorization.
+   MASKED_P is true if the statement is conditional on a vectorized mask.
    VECTYPE is the vector type that the vectorized statements will use.
    NCOPIES is the number of vector statements that will be needed.  */
 
 static bool
-get_load_store_type (gimple *stmt, tree vectype, bool slp,
+get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
 		     vec_load_store_type vls_type, unsigned int ncopies,
 		     vect_memory_access_type *memory_access_type,
 		     gather_scatter_info *gs_info)
@@ -2001,7 +2006,7 @@ get_load_store_type (gimple *stmt, tree vectype, bool slp,
     }
   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
-      if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
+      if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
 				      memory_access_type))
 	return false;
     }
@@ -5762,23 +5767,26 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     return false;
 
   vect_memory_access_type memory_access_type;
-  if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
+  if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
 			    &memory_access_type, &gs_info))
     return false;
 
   if (mask)
     {
-      if (memory_access_type != VMAT_CONTIGUOUS)
+      if (memory_access_type == VMAT_CONTIGUOUS)
+	{
+	  if (!VECTOR_MODE_P (vec_mode)
+	      || !can_vec_mask_load_store_p (vec_mode,
+					     TYPE_MODE (mask_vectype), false))
+	    return false;
+	}
+      else if (memory_access_type != VMAT_LOAD_STORE_LANES)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			     "unsupported access type for masked store.\n");
 	  return false;
 	}
-      if (!VECTOR_MODE_P (vec_mode)
-	  || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype),
-					 false))
-	return false;
     }
   else
     {
@@ -6421,12 +6429,27 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
 	    }
 
-	  /* Emit:
-	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
-	  gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
-						    vec_array);
-	  gimple_call_set_lhs (call, data_ref);
+	  gcall *call;
+	  if (mask)
+	    {
+	      /* Emit:
+		   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
+				     VEC_ARRAY).  */
+	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+	      tree alias_ptr = build_int_cst (ref_type, align);
+	      call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
+						 dataref_ptr, alias_ptr,
+						 vec_mask, vec_array);
+	    }
+	  else
+	    {
+	      /* Emit:
+		   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
+	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
+	      call = gimple_build_call_internal (IFN_STORE_LANES, 1,
+						 vec_array);
+	      gimple_call_set_lhs (call, data_ref);
+	    }
 	  gimple_call_set_nothrow (call, true);
 	  new_stmt = call;
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -6870,7 +6893,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     }
 
   vect_memory_access_type memory_access_type;
-  if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
+  if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
 			    &memory_access_type, &gs_info))
     return false;
 
@@ -6878,8 +6901,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     {
       if (memory_access_type == VMAT_CONTIGUOUS)
 	{
-	  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
-	      || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
+	  machine_mode vec_mode = TYPE_MODE (vectype);
+	  if (!VECTOR_MODE_P (vec_mode)
+	      || !can_vec_mask_load_store_p (vec_mode,
 					     TYPE_MODE (mask_vectype), true))
 	    return false;
 	}
@@ -6897,7 +6921,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	      return false;
 	    }
 	}
-      else
+      else if (memory_access_type != VMAT_LOAD_STORE_LANES)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7447,11 +7471,25 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
 	  vec_array = create_vector_array (vectype, vec_num);
 
-	  /* Emit:
-	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
-	  gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
-						    data_ref);
+	  gcall *call;
+	  if (mask)
+	    {
+	      /* Emit:
+		   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
+		                                VEC_MASK).  */
+	      unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
+	      tree alias_ptr = build_int_cst (ref_type, align);
+	      call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
+						 dataref_ptr, alias_ptr,
+						 vec_mask);
+	    }
+	  else
+	    {
+	      /* Emit:
+		   VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
+	      data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
+	      call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
+	    }
 	  gimple_call_set_lhs (call, vec_array);
 	  gimple_call_set_nothrow (call, true);
 	  new_stmt = call;
author	Richard Sandiford <richard.sandiford@linaro.org>	2018-01-13 17:57:57 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2018-01-13 17:57:57 +0000
commit	7e11fc7f5cecffe650b672ac1af212d4bd9f1335 (patch)
tree	bdbe6dcf4a77cd6837eba958b6c08cef87276a2f /gcc/tree-vect-stmts.c
parent	abc8eb9a45654662092ce1b6d452c13ee80be954 (diff)
download	gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.zip gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.gz gcc-7e11fc7f5cecffe650b672ac1af212d4bd9f1335.tar.bz2