diff options
author | Richard Biener <rguenther@suse.de> | 2016-01-18 14:25:56 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2016-01-18 14:25:56 +0000 |
commit | b555a2e4c36b415edb82d8e6d31b8735c62d2bfb (patch) | |
tree | 3865217f6caa55bd7700da18c5f76c5d1d7935ba /gcc | |
parent | 305708cedd962831b648783936cb6991dfdeb87d (diff) | |
download | gcc-b555a2e4c36b415edb82d8e6d31b8735c62d2bfb.zip gcc-b555a2e4c36b415edb82d8e6d31b8735c62d2bfb.tar.gz gcc-b555a2e4c36b415edb82d8e6d31b8735c62d2bfb.tar.bz2 |
re PR tree-optimization/69297 (Performance regression after r230020)
2016-01-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/69297
* tree-vect-slp.c (vect_bb_slp_scalar_cost): Count each scalar
stmt at most once.
(vect_bb_vectorization_profitable_p): Clear visited flag again.
* gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c: New testcase.
From-SVN: r232519
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c | 83 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 10 |
4 files changed, 105 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f5bd86f..10ad835 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2016-01-18 Richard Biener <rguenther@suse.de> + + PR tree-optimization/69297 + * tree-vect-slp.c (vect_bb_slp_scalar_cost): Count each scalar + stmt at most once. + (vect_bb_vectorization_profitable_p): Clear visited flag again. + 2016-01-18 Yuri Rumyantsev <ysrumyan@gmail.com> PR middle-end/68542 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 050535f..cfc6ce2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-01-18 Richard Biener <rguenther@suse.de> + + PR tree-optimization/69297 + * gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c: New testcase. + 2016-01-18 Joseph Myers <joseph@codesourcery.com> * gcc.target/mips/mips-3d-1.c: Use forbid_cpu=octeon.* in diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c new file mode 100644 index 0000000..e65a30c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c @@ -0,0 +1,83 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=core-avx2 -fdump-tree-slp-details" } */ + +#define abs(x) (x) < 0 ? -(x) : (x) +int +foo (int* diff) +{ + int k, satd = 0, m[16], d[16]; + + m[ 0] = diff[ 0] + diff[12]; + m[ 4] = diff[ 4] + diff[ 8]; + m[ 8] = diff[ 4] - diff[ 8]; + m[12] = diff[ 0] - diff[12]; + m[ 1] = diff[ 1] + diff[13]; + m[ 5] = diff[ 5] + diff[ 9]; + m[ 9] = diff[ 5] - diff[ 9]; + m[13] = diff[ 1] - diff[13]; + m[ 2] = diff[ 2] + diff[14]; + m[ 6] = diff[ 6] + diff[10]; + m[10] = diff[ 6] - diff[10]; + m[14] = diff[ 2] - diff[14]; + m[ 3] = diff[ 3] + diff[15]; + m[ 7] = diff[ 7] + diff[11]; + m[11] = diff[ 7] - diff[11]; + m[15] = diff[ 3] - diff[15]; + + d[ 0] = m[ 0] + m[ 4]; + d[ 8] = m[ 0] - m[ 4]; + d[ 4] = m[ 8] + m[12]; + d[12] = m[12] - m[ 8]; + d[ 1] = m[ 1] + m[ 5]; + d[ 9] = m[ 1] - m[ 5]; + d[ 5] = m[ 9] + m[13]; + d[13] = m[13] - m[ 9]; + d[ 2] = m[ 2] + m[ 6]; + d[10] = m[ 2] - m[ 6]; + d[ 6] = m[10] + m[14]; + d[14] = m[14] - m[10]; + d[ 3] = m[ 3] + m[ 7]; + d[11] = m[ 3] - m[ 7]; + d[ 7] = m[11] + m[15]; + d[15] = m[15] - m[11]; + + m[ 0] = d[ 0] + d[ 3]; + m[ 1] = d[ 1] + d[ 2]; + m[ 2] = d[ 1] - d[ 2]; + m[ 3] = d[ 0] - d[ 3]; + m[ 4] = d[ 4] + d[ 7]; + m[ 5] = d[ 5] + d[ 6]; + m[ 6] = d[ 5] - d[ 6]; + m[ 7] = d[ 4] - d[ 7]; + m[ 8] = d[ 8] + d[11]; + m[ 9] = d[ 9] + d[10]; + m[10] = d[ 9] - d[10]; + m[11] = d[ 8] - d[11]; + m[12] = d[12] + d[15]; + m[13] = d[13] + d[14]; + m[14] = d[13] - d[14]; + m[15] = d[12] - d[15]; + + d[ 0] = m[ 0] + m[ 1]; + d[ 1] = m[ 0] - m[ 1]; + d[ 2] = m[ 2] + m[ 3]; + d[ 3] = m[ 3] - m[ 2]; + d[ 4] = m[ 4] + m[ 5]; + d[ 5] = m[ 4] - m[ 5]; + d[ 6] = m[ 6] + m[ 7]; + d[ 7] = m[ 7] - m[ 6]; + d[ 8] = m[ 8] + m[ 9]; + d[ 9] = m[ 8] - m[ 9]; + d[10] = m[10] + m[11]; + d[11] = m[11] - m[10]; + d[12] = m[12] + m[13]; + d[13] = m[12] - m[13]; + d[14] = m[14] + m[15]; + d[15] = m[15] - m[14]; + for (k=0; k<16; k++) + satd += abs(d[k]); + return satd; +} + +/* { dg-final { scan-tree-dump "vectorization is not profitable" "slp1" } } */ +/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index d5bd92e..30e8c15 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2409,6 +2409,11 @@ vect_bb_slp_scalar_cost (basic_block bb, if ((*life)[i]) continue; + /* Count scalar stmts only once. */ + if (gimple_visited_p (stmt)) + continue; + gimple_set_visited (stmt, true); + stmt_info = vinfo_for_stmt (stmt); if (STMT_VINFO_DATA_REF (stmt_info)) { @@ -2451,6 +2456,11 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) &life); } + /* Unset visited flag. */ + for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; + gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) + gimple_set_visited (gsi_stmt (gsi), false); + /* Complete the target-specific cost calculation. */ finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost, &vec_inside_cost, &vec_epilogue_cost); |