aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel@linux.vnet.ibm.com>2017-01-05 10:03:01 +0000
committerAndreas Krebbel <krebbel@gcc.gnu.org>2017-01-05 10:03:01 +0000
commitf5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff (patch)
treee6245aa60b9f5216e1a26cbf9d907172f30404f1
parent8597cd335e507057e7df5dcc0c157cbd9a5bcbdd (diff)
downloadgcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.zip
gcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.tar.gz
gcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.tar.bz2
S/390: Unroll mvc loop for memcpy with small constant lengths.
See the memset unrolling patch. The very same applies to memcpys with constant lengths. 2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * config/s390/s390.c (s390_expand_movmem): Unroll MVC loop for small constant length operands. gcc/testsuite/ChangeLog: 2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com> * gcc.target/s390/memcpy-1.c: New test. From-SVN: r244098
-rw-r--r--gcc/config/s390/s390.c21
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/s390/memcpy-1.c53
3 files changed, 75 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 1266f45..9bd98eb 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -5246,10 +5246,25 @@ s390_expand_movmem (rtx dst, rtx src, rtx len)
&& (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
return false;
- if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+ /* Expand memcpy for constant length operands without a loop if it
+ is shorter that way.
+
+ With a constant length argument a
+ memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
+ if (GET_CODE (len) == CONST_INT
+ && INTVAL (len) >= 0
+ && INTVAL (len) <= 256 * 6
+ && (!TARGET_MVCLE || INTVAL (len) <= 256))
{
- if (INTVAL (len) > 0)
- emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
+ HOST_WIDE_INT o, l;
+
+ for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
+ {
+ rtx newdst = adjust_address (dst, BLKmode, o);
+ rtx newsrc = adjust_address (src, BLKmode, o);
+ emit_insn (gen_movmem_short (newdst, newsrc,
+ GEN_INT (l > 256 ? 255 : l - 1)));
+ }
}
else if (TARGET_MVCLE)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 16b27e6..46734e9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * gcc.target/s390/memcpy-1.c: New test.
+
2017-01-04 Jeff Law <law@redhat.com>
PR tree-optimization/78812
diff --git a/gcc/testsuite/gcc.target/s390/memcpy-1.c b/gcc/testsuite/gcc.target/s390/memcpy-1.c
new file mode 100644
index 0000000..58c1b49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/memcpy-1.c
@@ -0,0 +1,53 @@
+/* Make sure that short memcpy's with constant length are emitted
+ without loop statements. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch" } */
+
+/* 3 MVCs */
+void
+*memcpy1(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 700);
+}
+
+/* NOP */
+void
+*memcpy2(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 0);
+}
+
+/* 1 MVC */
+void
+*memcpy3(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 256);
+}
+
+/* 2 MVCs */
+void
+*memcpy4(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 512);
+}
+
+/* 3 MVCs */
+void
+*memcpy5(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 768);
+}
+
+/* Loop with 2 MVCs */
+void
+*memcpy6(void *dest, const void *src)
+{
+ return __builtin_memcpy (dest, src, 1537);
+}
+
+/* memcpy6 uses a loop - check for the two load address instructions
+ used to increment src and dest. */
+/* { dg-final { scan-assembler-times "la" 2 } } */
+
+/* { dg-final { scan-assembler-times "mvc" 11 } } */