aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2017-05-05 21:27:54 +0000
committerWilliam Schmidt <wschmidt@gcc.gnu.org>2017-05-05 21:27:54 +0000
commit9945596cefaa45d13ecab76b4d97ad021bc3a872 (patch)
tree2f004c595c0519caa3260985eb7778f48e61ede4
parentba82e6b5c83bd38a14ef50de8d7f92d6314fe2b3 (diff)
downloadgcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.zip
gcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.tar.gz
gcc-9945596cefaa45d13ecab76b4d97ad021bc3a872.tar.bz2
rs6000.c (rs6000_vect_nonmem): New static var.
[gcc] 2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. (rs6000_init_cost): Initialize rs6000_vect_nonmem. (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. (rs6000_finish_cost): Avoid vectorizing simple copy loops with VF=2 that require versioning. [gcc/testsuite] 2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * gcc.target/powerpc/versioned-copy-loop.c: New file. From-SVN: r247671
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/rs6000/rs6000.c27
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c30
4 files changed, 69 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index aeaa27d..a2f57ac 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
+ (rs6000_init_cost): Initialize rs6000_vect_nonmem.
+ (rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
+ (rs6000_finish_cost): Avoid vectorizing simple copy loops with
+ VF=2 that require versioning.
+
2017-05-05 David Malcolm <dmalcolm@redhat.com>
* diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8f68d84..bac56ab 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data)
/* Implement targetm.vectorize.init_cost. */
+/* For each vectorized loop, this var holds TRUE iff a non-memory vector
+ instruction is needed by the vectorization. */
+static bool rs6000_vect_nonmem;
+
static void *
rs6000_init_cost (struct loop *loop_info)
{
@@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info)
data->cost[vect_prologue] = 0;
data->cost[vect_body] = 0;
data->cost[vect_epilogue] = 0;
+ rs6000_vect_nonmem = false;
return data;
}
@@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
retval = (unsigned) (count * stmt_cost);
cost_data->cost[where] += retval;
+
+ /* Check whether we're doing something other than just a copy loop.
+ Not all such loops may be profitably vectorized; see
+ rs6000_finish_cost. */
+ if ((kind == vec_to_scalar || kind == vec_perm
+ || kind == vec_promote_demote || kind == vec_construct
+ || kind == scalar_to_vec)
+ || (where == vect_body && kind == vector_stmt))
+ rs6000_vect_nonmem = true;
}
return retval;
@@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
if (cost_data->loop_info)
rs6000_density_test (cost_data);
+ /* Don't vectorize minimum-vectorization-factor, simple copy loops
+ that require versioning for any reason. The vectorization is at
+ best a wash inside the loop, and the versioning checks make
+ profitability highly unlikely and potentially quite harmful. */
+ if (cost_data->loop_info)
+ {
+ loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
+ if (!rs6000_vect_nonmem
+ && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
+ && LOOP_REQUIRES_VERSIONING (vec_info))
+ cost_data->cost[vect_body] += 10000;
+ }
+
*prologue_cost = cost_data->cost[vect_prologue];
*body_cost = cost_data->cost[vect_body];
*epilogue_cost = cost_data->cost[vect_epilogue];
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5cd1286..bec90e3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2017-05-05 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * gcc.target/powerpc/versioned-copy-loop.c: New file.
+
2017-05-05 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/79038
diff --git a/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c
new file mode 100644
index 0000000..bbfd165
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+
+/* Verify that a pure copy loop with a vectorization factor of two
+ that requires alignment will not be vectorized. See the cost
+ model hooks in rs6000.c. */
+
+typedef long unsigned int size_t;
+typedef unsigned char uint8_t;
+
+extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
+ size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2)));
+
+void foo (void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+ uint8_t *d = (uint8_t*)dstPtr;
+ const uint8_t *s = (const uint8_t*)srcPtr;
+ uint8_t* const e = (uint8_t*)dstEnd;
+
+ do
+ {
+ memcpy (d, s, 8);
+ d += 8;
+ s += 8;
+ }
+ while (d < e);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */