aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2016-06-08 13:17:41 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2016-06-08 13:17:41 +0000
commite09b4c37aca7a12184b3bbd1692601765769fb1b (patch)
tree59061fe2da254d2d451ceb452cf97d5736dd533b /gcc
parent72d50660ad57224cefc7ad40b81c994b49a503b6 (diff)
downloadgcc-e09b4c37aca7a12184b3bbd1692601765769fb1b.zip
gcc-e09b4c37aca7a12184b3bbd1692601765769fb1b.tar.gz
gcc-e09b4c37aca7a12184b3bbd1692601765769fb1b.tar.bz2
tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case...
2016-06-08 Richard Biener <rguenther@suse.de> * tree-vect-stmts.c (vectorizable_load): Remove restrictions on strided SLP loads and fall back to scalar loads in case we can't chunk them. * gcc.dg/vect/slp-43.c: New testcase. From-SVN: r237215
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-43.c78
-rw-r--r--gcc/tree-vect-stmts.c94
4 files changed, 134 insertions, 48 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4f5d317..474b064 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
2016-06-08 Richard Biener <rguenther@suse.de>
+ * tree-vect-stmts.c (vectorizable_load): Remove restrictions
+ on strided SLP loads and fall back to scalar loads in case
+ we can't chunk them.
+
+2016-06-08 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/71452
* tree-ssa.c (non_rewritable_lvalue_p): Make sure that the
type used for the SSA rewrite has enough precision to cover
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 28b17cf..1d80915 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
2016-06-08 Richard Biener <rguenther@suse.de>
+ * gcc.dg/vect/slp-43.c: New testcase.
+
+2016-06-08 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/71452
* gcc.dg/torture/pr71452.c: New testcase.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-43.c b/gcc/testsuite/gcc.dg/vect/slp-43.c
new file mode 100644
index 0000000..4e8df46
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-43.c
@@ -0,0 +1,78 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+#include <string.h>
+#include "tree-vect.h"
+
+#define FOO(T,N) \
+void __attribute__((noinline,noclone)) \
+foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
+{ \
+ T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
+ T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
+ for (int i = 0; i < 16; i++) \
+ { \
+ for (int j = 0; j < N; ++j) \
+ out[j] = in[j]; \
+ in += s*N; \
+ out += N; \
+ } \
+}
+
+#define TEST(T,N) \
+ do { \
+ memset (out, 0, 4096); \
+ foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
+ if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+ __builtin_abort (); \
+ for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+ if (out[i] != 0) \
+ __builtin_abort (); \
+ } while (0)
+
+FOO(char, 1)
+FOO(char, 2)
+FOO(char, 3)
+FOO(char, 4)
+FOO(char, 6)
+FOO(char, 8)
+FOO(int, 1)
+FOO(int, 2)
+FOO(int, 3)
+FOO(int, 4)
+FOO(int, 6)
+FOO(int, 8)
+FOO(int, 16)
+
+char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+
+int main()
+{
+ check_vect ();
+
+ for (int i = 0; i < 4096; ++i)
+ {
+ in[i] = i;
+ __asm__ volatile ("" : : : "memory");
+ }
+
+ TEST(char, 1);
+ TEST(char, 2);
+ TEST(char, 3);
+ TEST(char, 4);
+ TEST(char, 6);
+ TEST(char, 8);
+ TEST(int, 1);
+ TEST(int, 2);
+ TEST(int, 3);
+ TEST(int, 4);
+ TEST(int, 6);
+ TEST(int, 8);
+ TEST(int, 16);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 373ecd7..bee064e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6440,17 +6440,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
}
else if (STMT_VINFO_STRIDED_P (stmt_info))
- {
- if (grouped_load
- && slp
- && (group_size > nunits
- || nunits % group_size != 0))
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unhandled strided group load\n");
- return false;
- }
- }
+ ;
else
{
negative = tree_int_cst_compare (nested_in_vect_loop
@@ -6744,16 +6734,29 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
running_off = offvar;
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
int nloads = nunits;
+ int lnel = 1;
tree ltype = TREE_TYPE (vectype);
auto_vec<tree> dr_chain;
if (slp)
{
- nloads = nunits / group_size;
- if (group_size < nunits)
- ltype = build_vector_type (TREE_TYPE (vectype), group_size);
- else
- ltype = vectype;
- ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+ if (group_size < nunits
+ && nunits % group_size == 0)
+ {
+ nloads = nunits / group_size;
+ lnel = group_size;
+ ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+ ltype = build_aligned_type (ltype,
+ TYPE_ALIGN (TREE_TYPE (vectype)));
+ }
+ else if (group_size >= nunits
+ && group_size % nunits == 0)
+ {
+ nloads = 1;
+ lnel = nunits;
+ ltype = vectype;
+ ltype = build_aligned_type (ltype,
+ TYPE_ALIGN (TREE_TYPE (vectype)));
+ }
/* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result
fits in. */
@@ -6765,48 +6768,43 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
+ int group_el = 0;
+ unsigned HOST_WIDE_INT
+ elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++)
{
- tree vec_inv;
-
if (nloads > 1)
+ vec_alloc (v, nloads);
+ for (i = 0; i < nloads; i++)
{
- vec_alloc (v, nloads);
- for (i = 0; i < nloads; i++)
+ tree this_off = build_int_cst (TREE_TYPE (alias_off),
+ group_el * elsz);
+ new_stmt = gimple_build_assign (make_ssa_name (ltype),
+ build2 (MEM_REF, ltype,
+ running_off, this_off));
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (nloads > 1)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ gimple_assign_lhs (new_stmt));
+
+ group_el += lnel;
+ if (! slp
+ || group_el == group_size)
{
- tree newref, newoff;
- gimple *incr;
- newref = build2 (MEM_REF, ltype, running_off, alias_off);
-
- newref = force_gimple_operand_gsi (gsi, newref, true,
- NULL_TREE, true,
- GSI_SAME_STMT);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
- newoff = copy_ssa_name (running_off);
- incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
- running_off, stride_step);
+ tree newoff = copy_ssa_name (running_off);
+ gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+ running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
+ group_el = 0;
}
-
- vec_inv = build_constructor (vectype, v);
- new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
- new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
- else
+ if (nloads > 1)
{
- new_stmt = gimple_build_assign (make_ssa_name (ltype),
- build2 (MEM_REF, ltype,
- running_off, alias_off));
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
- tree newoff = copy_ssa_name (running_off);
- gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
- running_off, stride_step);
- vect_finish_stmt_generation (stmt, incr, gsi);
-
- running_off = newoff;
+ tree vec_inv = build_constructor (vectype, v);
+ new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
if (slp)