diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2017-05-05 21:27:54 +0000 |
---|---|---|
committer | William Schmidt <wschmidt@gcc.gnu.org> | 2017-05-05 21:27:54 +0000 |
commit | 9945596cefaa45d13ecab76b4d97ad021bc3a872 (patch) | |
tree | 2f004c595c0519caa3260985eb7778f48e61ede4 /gcc | |
parent | ba82e6b5c83bd38a14ef50de8d7f92d6314fe2b3 (diff) | |
download | gcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.zip gcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.tar.gz gcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.tar.bz2 |
rs6000.c (rs6000_vect_nonmem): New static var.
[gcc]
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
(rs6000_init_cost): Initialize rs6000_vect_nonmem.
(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
(rs6000_finish_cost): Avoid vectorizing simple copy loops with
VF=2 that require versioning.
[gcc/testsuite]
2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/versioned-copy-loop.c: New file.
From-SVN: r247671
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 27 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c | 30 |
4 files changed, 69 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aeaa27d..a2f57ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. + (rs6000_init_cost): Initialize rs6000_vect_nonmem. + (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. + (rs6000_finish_cost): Avoid vectorizing simple copy loops with + VF=2 that require versioning. + 2017-05-05 David Malcolm <dmalcolm@redhat.com> * diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8f68d84..bac56ab 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data) /* Implement targetm.vectorize.init_cost. */ +/* For each vectorized loop, this var holds TRUE iff a non-memory vector + instruction is needed by the vectorization. */ +static bool rs6000_vect_nonmem; + static void * rs6000_init_cost (struct loop *loop_info) { @@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info) data->cost[vect_prologue] = 0; data->cost[vect_body] = 0; data->cost[vect_epilogue] = 0; + rs6000_vect_nonmem = false; return data; } @@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, retval = (unsigned) (count * stmt_cost); cost_data->cost[where] += retval; + + /* Check whether we're doing something other than just a copy loop. + Not all such loops may be profitably vectorized; see + rs6000_finish_cost. */ + if ((kind == vec_to_scalar || kind == vec_perm + || kind == vec_promote_demote || kind == vec_construct + || kind == scalar_to_vec) + || (where == vect_body && kind == vector_stmt)) + rs6000_vect_nonmem = true; } return retval; @@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost, if (cost_data->loop_info) rs6000_density_test (cost_data); + /* Don't vectorize minimum-vectorization-factor, simple copy loops + that require versioning for any reason. The vectorization is at + best a wash inside the loop, and the versioning checks make + profitability highly unlikely and potentially quite harmful. */ + if (cost_data->loop_info) + { + loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); + if (!rs6000_vect_nonmem + && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 + && LOOP_REQUIRES_VERSIONING (vec_info)) + cost_data->cost[vect_body] += 10000; + } + *prologue_cost = cost_data->cost[vect_prologue]; *body_cost = cost_data->cost[vect_body]; *epilogue_cost = cost_data->cost[vect_epilogue]; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5cd1286..bec90e3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * gcc.target/powerpc/versioned-copy-loop.c: New file. + 2017-05-05 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/79038 diff --git a/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c new file mode 100644 index 0000000..bbfd165 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ + +/* Verify that a pure copy loop with a vectorization factor of two + that requires alignment will not be vectorized. See the cost + model hooks in rs6000.c. */ + +typedef long unsigned int size_t; +typedef unsigned char uint8_t; + +extern void *memcpy (void *__restrict __dest, const void *__restrict __src, + size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2))); + +void foo (void *dstPtr, const void *srcPtr, void *dstEnd) +{ + uint8_t *d = (uint8_t*)dstPtr; + const uint8_t *s = (const uint8_t*)srcPtr; + uint8_t* const e = (uint8_t*)dstEnd; + + do + { + memcpy (d, s, 8); + d += 8; + s += 8; + } + while (d < e); +} + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ |