diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2019-11-11 19:43:52 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-11-11 19:43:52 +0000 |
commit | 599bd99078439b9f11cb271aa919844318381ec5 (patch) | |
tree | 43e915fe087ef94d41149983ad0b8fba1b8547ff | |
parent | 98aad294705ac35aec058e020867d736d841dad0 (diff) | |
download | gcc-599bd99078439b9f11cb271aa919844318381ec5.zip gcc-599bd99078439b9f11cb271aa919844318381ec5.tar.gz gcc-599bd99078439b9f11cb271aa919844318381ec5.tar.bz2 |
Fix SLP downward group access classification (PR92420)
This PR was caused by the SLP handling in get_group_load_store_type
returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for
downward groups.
A more elaborate fix would be to try to combine the reverse permutation
into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on
optimisation and not backport material. It might also not necessarily
be a win, if the target supports (say) reversing and odd/even swaps
as independent permutes but doesn't recognise the combined form.
2019-11-11 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/92420
* tree-vect-stmts.c (get_negative_load_store_type): Move further
up file.
(get_group_load_store_type): Use it for reversed SLP accesses.
gcc/testsuite/
* gcc.dg/vect/pr92420.c: New test.
From-SVN: r278064
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr92420.c | 48 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 110 |
4 files changed, 118 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 72d1e3d..329d107 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-11-11 Richard Sandiford <richard.sandiford@arm.com> + + PR tree-optimization/92420 + * tree-vect-stmts.c (get_negative_load_store_type): Move further + up file. + (get_group_load_store_type): Use it for reversed SLP accesses. + 2019-11-11 Jan Hubicka <hubcika@ucw.cz> * ipa-prop.c (ipa_propagate_indirect_call_infos): Remove ipcp diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e734954..88088fa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-11-11 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.dg/vect/pr92420.c: New test. + 2019-11-11 Claudiu Zissulescu <claziss@gmail.com> * gcc.target/arc/delay-slot-limm.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr92420.c b/gcc/testsuite/gcc.dg/vect/pr92420.c new file mode 100644 index 0000000..e43539f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr92420.c @@ -0,0 +1,48 @@ +/* { dg-additional-options "-mavx2" { target avx_runtime } } */ + +#include "tree-vect.h" + +#define N 16 +struct C { int r, i; }; +struct C a[N], b[N], c[N], d[N], e[N]; + +__attribute__((noipa)) static void +foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i; + z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i; + } +} + +__attribute__((noipa)) static void +bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[i].r - x[i].i * y[i].i; + z[i].i = x[i].i * y[i].r + x[i].r * y[i].i; + } +} + +int +main () +{ + check_vect (); + int i; + for (i = 0; i < N; ++i) + { + a[i].r = N - i; a[i].i = i - N; + b[i].r = i - N; b[i].i = i + N; + c[i].r = -1 - i; c[i].i = 2 * N - 1 - i; + } + foo (a, b + N, d, N); + bar (a, c, e, N); + for (i = 0; i < N; ++i) + if (d[i].r != e[i].r || d[i].i != e[i].i) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index eaca014..1da949a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype) return vect_gen_perm_mask_checked (vectype, indices); } +/* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is a load or store that + accesses consecutive elements with a negative step. */ + +static vect_memory_access_type +get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, + vec_load_store_type vls_type, + unsigned int ncopies) +{ + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); + dr_alignment_support alignment_support_scheme; + + if (ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types with negative step.\n"); + return VMAT_ELEMENTWISE; + } + + alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return VMAT_ELEMENTWISE; + } + + if (vls_type == VLS_STORE_INVARIANT) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "negative step with invariant source;" + " no permute needed.\n"); + return VMAT_CONTIGUOUS_DOWN; + } + + if (!perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return VMAT_ELEMENTWISE; + } + + return VMAT_CONTIGUOUS_REVERSE; +} + /* STMT_INFO is either a masked or unconditional store. Return the value being stored. */ @@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, "Peeling for outer loop is not supported\n"); return false; } - *memory_access_type = VMAT_CONTIGUOUS; + int cmp = compare_step_with_zero (stmt_info); + if (cmp < 0) + *memory_access_type = get_negative_load_store_type + (stmt_info, vectype, vls_type, 1); + else + { + gcc_assert (!loop_vinfo || cmp > 0); + *memory_access_type = VMAT_CONTIGUOUS; + } } } else @@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, return true; } -/* A subroutine of get_load_store_type, with a subset of the same - arguments. Handle the case where STMT_INFO is a load or store that - accesses consecutive elements with a negative step. */ - -static vect_memory_access_type -get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, - vec_load_store_type vls_type, - unsigned int ncopies) -{ - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); - dr_alignment_support alignment_support_scheme; - - if (ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return VMAT_ELEMENTWISE; - } - - alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); - return VMAT_ELEMENTWISE; - } - - if (vls_type == VLS_STORE_INVARIANT) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "negative step with invariant source;" - " no permute needed.\n"); - return VMAT_CONTIGUOUS_DOWN; - } - - if (!perm_mask_for_reverse (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported.\n"); - return VMAT_ELEMENTWISE; - } - - return VMAT_CONTIGUOUS_REVERSE; -} - /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true if there is a memory access type that the vectorized form can use, storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers |