aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-11-06 12:43:11 +0100
committerRichard Biener <rguenther@suse.de>2023-11-06 14:06:01 +0100
commit3cc9ad41db87fb85b13a56bff1f930c258542a70 (patch)
tree84ce7033686d67f9fc8ec4de56ce4613a42a227f
parent9d1bf1d0b7a14ef741e967e0ab3ead35ec8e5f4e (diff)
downloadgcc-3cc9ad41db87fb85b13a56bff1f930c258542a70.zip
gcc-3cc9ad41db87fb85b13a56bff1f930c258542a70.tar.gz
gcc-3cc9ad41db87fb85b13a56bff1f930c258542a70.tar.bz2
tree-optimization/112404 - two issues with SLP of .MASK_LOAD
The following fixes an oversight in vect_check_scalar_mask when the mask is external or constant. When doing BB vectorization we need to provide a group_size, best via an overload accepting the SLP node as argument. When fixed we then run into the issue that we have not analyzed alignment of the .MASK_LOADs because they were not identified as loads by vect_gather_slp_loads. Fixed by reworking the detection. PR tree-optimization/112404 * tree-vectorizer.h (get_mask_type_for_scalar_type): Declare overload with SLP node argument. * tree-vect-stmts.cc (get_mask_type_for_scalar_type): Implement it. (vect_check_scalar_mask): Use it. * tree-vect-slp.cc (vect_gather_slp_loads): Properly identify loads also for nodes with children, like .MASK_LOAD. * tree-vect-loop.cc (vect_analyze_loop_2): Look at the representative for load nodes and check whether it is a grouped access before looking for load-lanes support. * gfortran.dg/pr112404.f90: New testcase.
-rw-r--r--gcc/testsuite/gfortran.dg/pr112404.f9023
-rw-r--r--gcc/tree-vect-loop.cc47
-rw-r--r--gcc/tree-vect-slp.cc23
-rw-r--r--gcc/tree-vect-stmts.cc22
-rw-r--r--gcc/tree-vectorizer.h1
5 files changed, 82 insertions, 34 deletions
diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90 b/gcc/testsuite/gfortran.dg/pr112404.f90
new file mode 100644
index 0000000..573fa28
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr112404.f90
@@ -0,0 +1,23 @@
+! { dg-do compile }
+! { dg-options "-Ofast" }
+! { dg-additional-options "-mavx2" { target avx2 } }
+ SUBROUTINE sfddagd( regime, znt, ite, jte )
+ REAL, DIMENSION( ime, IN) :: regime, znt
+ REAL, DIMENSION( ite, jte) :: wndcor_u
+ LOGICAL wrf_dm_on_monitor
+ IF( int4 == 1 ) THEN
+ DO j=jts,jtf
+ DO i=itsu,itf
+ reg = regime(i-1, j)
+ IF( reg > 10.0 ) THEN
+ znt0 = znt(i-1, j) + znt(i, j)
+ IF( znt0 <= 0.2) THEN
+ wndcor_u(i,j) = 0.2
+ ENDIF
+ ENDIF
+ ENDDO
+ ENDDO
+ IF ( wrf_dm_on_monitor()) THEN
+ ENDIF
+ ENDIF
+ END
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 362856a..5213aa0 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2943,17 +2943,19 @@ start_over:
!= IFN_LAST)
{
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
- {
- stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
- (SLP_TREE_SCALAR_STMTS (load_node)[0]);
- /* Use SLP for strided accesses (or if we can't
- load-lanes). */
- if (STMT_VINFO_STRIDED_P (stmt_vinfo)
- || vect_load_lanes_supported
- (STMT_VINFO_VECTYPE (stmt_vinfo),
- DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
- break;
- }
+ if (STMT_VINFO_GROUPED_ACCESS
+ (SLP_TREE_REPRESENTATIVE (load_node)))
+ {
+ stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+ (SLP_TREE_REPRESENTATIVE (load_node));
+ /* Use SLP for strided accesses (or if we can't
+ load-lanes). */
+ if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+ || vect_load_lanes_supported
+ (STMT_VINFO_VECTYPE (stmt_vinfo),
+ DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
+ break;
+ }
can_use_lanes
= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
@@ -3261,16 +3263,19 @@ again:
"unsupported grouped store\n");
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
{
- vinfo = SLP_TREE_SCALAR_STMTS (node)[0];
- vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
- bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
- size = DR_GROUP_SIZE (vinfo);
- vectype = STMT_VINFO_VECTYPE (vinfo);
- if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
- && ! vect_grouped_load_supported (vectype, single_element_p,
- size))
- return opt_result::failure_at (vinfo->stmt,
- "unsupported grouped load\n");
+ vinfo = SLP_TREE_REPRESENTATIVE (node);
+ if (STMT_VINFO_GROUPED_ACCESS (vinfo))
+ {
+ vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
+ bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
+ size = DR_GROUP_SIZE (vinfo);
+ vectype = STMT_VINFO_VECTYPE (vinfo);
+ if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
+ && ! vect_grouped_load_supported (vectype, single_element_p,
+ size))
+ return opt_result::failure_at (vinfo->stmt,
+ "unsupported grouped load\n");
+ }
}
}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6b8a7b6..13137ed 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2898,22 +2898,21 @@ vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
if (!node || visited.add (node))
return;
- if (SLP_TREE_CHILDREN (node).length () == 0)
+ if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+ return;
+
+ if (SLP_TREE_CODE (node) != VEC_PERM_EXPR)
{
- if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
- return;
- stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
+ if (STMT_VINFO_DATA_REF (stmt_info)
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
loads.safe_push (node);
}
- else
- {
- unsigned i;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_gather_slp_loads (loads, child, visited);
- }
+
+ unsigned i;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ vect_gather_slp_loads (loads, child, visited);
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f895aaf..eefb1ee 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2456,7 +2456,8 @@ vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (!mask_vectype)
- mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
+ mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
+ mask_node_1);
if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
{
@@ -13386,6 +13387,25 @@ get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
return truth_type_for (vectype);
}
+/* Function get_mask_type_for_scalar_type.
+
+ Returns the mask type corresponding to a result of comparison
+ of vectors of specified SCALAR_TYPE as supported by target.
+ NODE, if nonnull, is the SLP tree node that will use the returned
+ vector type. */
+
+tree
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ slp_tree node)
+{
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
+
+ if (!vectype)
+ return NULL;
+
+ return truth_type_for (vectype);
+}
+
/* Function get_same_sized_vectype
Returns a vector type corresponding to SCALAR_TYPE of size
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 254d172..d2ddc2e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2207,6 +2207,7 @@ extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
+extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree);
extern tree get_same_sized_vectype (tree, tree);
extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
extern bool vect_get_loop_mask_type (loop_vec_info);