diff options
author | Richard Biener <rguenther@suse.de> | 2024-10-04 11:13:58 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2024-10-05 13:59:32 +0200 |
commit | 515f015f3cc4978b8b02bb61ba50ba67d2a24065 (patch) | |
tree | d1c15f375c5c65e6cc29f2fb39e98d24cfadaaca /gcc | |
parent | 7d736ecbc05a35f73fbd8e3b010d6e9821c34404 (diff) | |
download | gcc-515f015f3cc4978b8b02bb61ba50ba67d2a24065.zip gcc-515f015f3cc4978b8b02bb61ba50ba67d2a24065.tar.gz gcc-515f015f3cc4978b8b02bb61ba50ba67d2a24065.tar.bz2 |
Improve load permutation lowering
The following makes sure the emitted even/odd extraction scheme
follows one that ends up with actual trivial even/odd extract permutes.
When we choose a level 2 extract we generate { 0, 1, 4, 5, ... }
which for example the x86 backend doesn't recognize with just SSE
and QImode elements. So this now follows what the non-SLP interleaving
code would do which is element granular even/odd extracts.
This resolves gcc.dg/vect/vect-strided[-a]-u8-i8-gap*.c FAILs with
--param vect-force-slp=1 on x86_64.
* tree-vect-slp.cc (vect_lower_load_permutations): Prefer
level 1 even/odd extracts.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/tree-vect-slp.cc | 31 |
1 files changed, 17 insertions, 14 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 482b9d5..2274d0e 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -4426,25 +4426,28 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, /* Now build an even or odd extraction from the unpermuted load. */ lane_permutation_t perm; perm.create ((group_lanes + 1) / 2); - unsigned level; - if (even - && ((level = 1 << ctz_hwi (even)), true) - && group_lanes % (2 * level) == 0) + unsigned even_level = even ? 1 << ctz_hwi (even) : 0; + unsigned odd_level = odd ? 1 << ctz_hwi (odd) : 0; + if (even_level + && group_lanes % (2 * even_level) == 0 + /* ??? When code generating permutes we do not try to pun + to larger component modes so level != 1 isn't a natural + even/odd extract. Prefer one if possible. */ + && (even_level == 1 || !odd_level || odd_level != 1)) { /* { 0, 1, ... 4, 5 ..., } */ - unsigned level = 1 << ctz_hwi (even); - for (unsigned i = 0; i < group_lanes / 2 / level; ++i) - for (unsigned j = 0; j < level; ++j) - perm.quick_push (std::make_pair (0, 2 * i * level + j)); + for (unsigned i = 0; i < group_lanes / 2 / even_level; ++i) + for (unsigned j = 0; j < even_level; ++j) + perm.quick_push (std::make_pair (0, 2 * i * even_level + j)); } - else if (odd) + else if (odd_level) { /* { ..., 2, 3, ... 6, 7 } */ - unsigned level = 1 << ctz_hwi (odd); - gcc_assert (group_lanes % (2 * level) == 0); - for (unsigned i = 0; i < group_lanes / 2 / level; ++i) - for (unsigned j = 0; j < level; ++j) - perm.quick_push (std::make_pair (0, (2 * i + 1) * level + j)); + gcc_assert (group_lanes % (2 * odd_level) == 0); + for (unsigned i = 0; i < group_lanes / 2 / odd_level; ++i) + for (unsigned j = 0; j < odd_level; ++j) + perm.quick_push + (std::make_pair (0, (2 * i + 1) * odd_level + j)); } else { |