diff options
author | Richard Biener <rguenther@suse.de> | 2024-09-20 12:17:22 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2024-09-20 14:49:23 +0200 |
commit | 664e0ce580a8f20a78aa355c42222e4647841f77 (patch) | |
tree | 2387aeb7144cced396bf39ac47bd94c3496cc29b /gcc | |
parent | 4eba48a684b1a1f77aa18b29a1ae58dbdc842b5b (diff) | |
download | gcc-664e0ce580a8f20a78aa355c42222e4647841f77.zip gcc-664e0ce580a8f20a78aa355c42222e4647841f77.tar.gz gcc-664e0ce580a8f20a78aa355c42222e4647841f77.tar.bz2 |
Fall back to elementwise access for too spaced SLP single element interleaving
gcc.dg/vect/vect-pr111779.c is a case where non-SLP manages to vectorize
using VMAT_ELEMENTWISE but SLP currently refuses because doing a regular
access with permutes would cause excess vector loads with at most one
element used. The following makes us fall back to elementwise accesses
for that, too.
* tree-vect-stmts.cc (get_group_load_store_type): Fall back
to VMAT_ELEMENTWISE when single element interleaving of
a too large group.
(vectorizable_load): Do not try to verify load permutations
when using VMAT_ELEMENTWISE for single-lane SLP and fix code
generation for this case.
* gfortran.dg/vect/vect-8.f90: Allow one more vectorized loop.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 2 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 37 |
2 files changed, 23 insertions, 16 deletions
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 index 2a3fa90..918edde 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 @@ -708,5 +708,5 @@ END SUBROUTINE kernel ! { dg-final { scan-tree-dump-times "vectorized 2\[56\] loops" 1 "vect" { target aarch64_sve } } } ! { dg-final { scan-tree-dump-times "vectorized 2\[45\] loops" 1 "vect" { target { aarch64*-*-* && { ! aarch64_sve } } } } } -! { dg-final { scan-tree-dump-times "vectorized 2\[345\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } +! { dg-final { scan-tree-dump-times "vectorized 2\[3456\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 33cdcca..45003f7 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2190,11 +2190,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, && single_element_p && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype))) { + *memory_access_type = VMAT_ELEMENTWISE; if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "single-element interleaving not supported " - "for not adjacent vector loads\n"); - return false; + "for not adjacent vector loads, using " + "elementwise access\n"); } } } @@ -10039,7 +10040,23 @@ vectorizable_load (vec_info *vinfo, else group_size = 1; - if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) + vect_memory_access_type memory_access_type; + enum dr_alignment_support alignment_support_scheme; + int misalignment; + poly_int64 poffset; + internal_fn lanes_ifn; + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, + ncopies, &memory_access_type, &poffset, + &alignment_support_scheme, &misalignment, &gs_info, + &lanes_ifn)) + return false; + + /* ??? The following checks should really be part of + get_group_load_store_type. */ + if (slp + && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () + && !(memory_access_type == VMAT_ELEMENTWISE + && SLP_TREE_LANES (slp_node) == 1)) { slp_perm = true; @@ -10079,17 +10096,6 @@ vectorizable_load (vec_info *vinfo, } } - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, - ncopies, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn)) - return false; - if (slp_node && slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) @@ -10292,7 +10298,8 @@ vectorizable_load (vec_info *vinfo, first_dr_info = dr_info; } - if (slp && grouped_load) + if (slp && grouped_load + && memory_access_type == VMAT_STRIDED_SLP) { group_size = DR_GROUP_SIZE (first_stmt_info); ref_type = get_group_alias_ptr_type (first_stmt_info); |