aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-09-20 12:17:22 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-09-20 14:49:23 +0200
commit664e0ce580a8f20a78aa355c42222e4647841f77 (patch)
tree2387aeb7144cced396bf39ac47bd94c3496cc29b /gcc
parent4eba48a684b1a1f77aa18b29a1ae58dbdc842b5b (diff)
downloadgcc-664e0ce580a8f20a78aa355c42222e4647841f77.zip
gcc-664e0ce580a8f20a78aa355c42222e4647841f77.tar.gz
gcc-664e0ce580a8f20a78aa355c42222e4647841f77.tar.bz2
Fall back to elementwise access for too spaced SLP single element interleaving
gcc.dg/vect/vect-pr111779.c is a case where non-SLP manages to vectorize using VMAT_ELEMENTWISE but SLP currently refuses because doing a regular access with permutes would cause excess vector loads with at most one element used. The following makes us fall back to elementwise accesses for that, too. * tree-vect-stmts.cc (get_group_load_store_type): Fall back to VMAT_ELEMENTWISE when single element interleaving of a too large group. (vectorizable_load): Do not try to verify load permutations when using VMAT_ELEMENTWISE for single-lane SLP and fix code generation for this case. * gfortran.dg/vect/vect-8.f90: Allow one more vectorized loop.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gfortran.dg/vect/vect-8.f902
-rw-r--r--gcc/tree-vect-stmts.cc37
2 files changed, 23 insertions, 16 deletions
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
index 2a3fa90..918edde 100644
--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
@@ -708,5 +708,5 @@ END SUBROUTINE kernel
! { dg-final { scan-tree-dump-times "vectorized 2\[56\] loops" 1 "vect" { target aarch64_sve } } }
! { dg-final { scan-tree-dump-times "vectorized 2\[45\] loops" 1 "vect" { target { aarch64*-*-* && { ! aarch64_sve } } } } }
-! { dg-final { scan-tree-dump-times "vectorized 2\[345\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
+! { dg-final { scan-tree-dump-times "vectorized 2\[3456\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 33cdcca..45003f7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2190,11 +2190,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
&& single_element_p
&& maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
{
+ *memory_access_type = VMAT_ELEMENTWISE;
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"single-element interleaving not supported "
- "for not adjacent vector loads\n");
- return false;
+ "for not adjacent vector loads, using "
+ "elementwise access\n");
}
}
}
@@ -10039,7 +10040,23 @@ vectorizable_load (vec_info *vinfo,
else
group_size = 1;
- if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+ vect_memory_access_type memory_access_type;
+ enum dr_alignment_support alignment_support_scheme;
+ int misalignment;
+ poly_int64 poffset;
+ internal_fn lanes_ifn;
+ if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
+ ncopies, &memory_access_type, &poffset,
+ &alignment_support_scheme, &misalignment, &gs_info,
+ &lanes_ifn))
+ return false;
+
+ /* ??? The following checks should really be part of
+ get_group_load_store_type. */
+ if (slp
+ && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+ && !(memory_access_type == VMAT_ELEMENTWISE
+ && SLP_TREE_LANES (slp_node) == 1))
{
slp_perm = true;
@@ -10079,17 +10096,6 @@ vectorizable_load (vec_info *vinfo,
}
}
- vect_memory_access_type memory_access_type;
- enum dr_alignment_support alignment_support_scheme;
- int misalignment;
- poly_int64 poffset;
- internal_fn lanes_ifn;
- if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
- ncopies, &memory_access_type, &poffset,
- &alignment_support_scheme, &misalignment, &gs_info,
- &lanes_ifn))
- return false;
-
if (slp_node
&& slp_node->ldst_lanes
&& memory_access_type != VMAT_LOAD_STORE_LANES)
@@ -10292,7 +10298,8 @@ vectorizable_load (vec_info *vinfo,
first_dr_info = dr_info;
}
- if (slp && grouped_load)
+ if (slp && grouped_load
+ && memory_access_type == VMAT_STRIDED_SLP)
{
group_size = DR_GROUP_SIZE (first_stmt_info);
ref_type = get_group_alias_ptr_type (first_stmt_info);