diff options
author | Andreas Krebbel <krebbel@linux.vnet.ibm.com> | 2017-01-05 10:03:01 +0000 |
---|---|---|
committer | Andreas Krebbel <krebbel@gcc.gnu.org> | 2017-01-05 10:03:01 +0000 |
commit | f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff (patch) | |
tree | e6245aa60b9f5216e1a26cbf9d907172f30404f1 /gcc | |
parent | 8597cd335e507057e7df5dcc0c157cbd9a5bcbdd (diff) | |
download | gcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.zip gcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.tar.gz gcc-f5a537e3905a9a098f8f4acaa8cd8e4fc1be07ff.tar.bz2 |
S/390: Unroll mvc loop for memcpy with small constant lengths.
See the memset unrolling patch. The very same applies to memcpys with
constant lengths.
2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.c (s390_expand_movmem): Unroll MVC loop for
small constant length operands.
gcc/testsuite/ChangeLog:
2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/memcpy-1.c: New test.
From-SVN: r244098
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/s390/s390.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/s390/memcpy-1.c | 53 |
3 files changed, 75 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 1266f45..9bd98eb 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -5246,10 +5246,25 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) return false; - if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) + /* Expand memcpy for constant length operands without a loop if it + is shorter that way. + + With a constant length argument a + memcpy loop (without pfd) is 36 bytes -> 6 * mvc */ + if (GET_CODE (len) == CONST_INT + && INTVAL (len) >= 0 + && INTVAL (len) <= 256 * 6 + && (!TARGET_MVCLE || INTVAL (len) <= 256)) { - if (INTVAL (len) > 0) - emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1))); + HOST_WIDE_INT o, l; + + for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256) + { + rtx newdst = adjust_address (dst, BLKmode, o); + rtx newsrc = adjust_address (src, BLKmode, o); + emit_insn (gen_movmem_short (newdst, newsrc, + GEN_INT (l > 256 ? 255 : l - 1))); + } } else if (TARGET_MVCLE) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 16b27e6..46734e9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com> + + * gcc.target/s390/memcpy-1.c: New test. + 2017-01-04 Jeff Law <law@redhat.com> PR tree-optimization/78812 diff --git a/gcc/testsuite/gcc.target/s390/memcpy-1.c b/gcc/testsuite/gcc.target/s390/memcpy-1.c new file mode 100644 index 0000000..58c1b49 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/memcpy-1.c @@ -0,0 +1,53 @@ +/* Make sure that short memcpy's with constant length are emitted + without loop statements. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch" } */ + +/* 3 MVCs */ +void +*memcpy1(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 700); +} + +/* NOP */ +void +*memcpy2(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 0); +} + +/* 1 MVC */ +void +*memcpy3(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 256); +} + +/* 2 MVCs */ +void +*memcpy4(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 512); +} + +/* 3 MVCs */ +void +*memcpy5(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 768); +} + +/* Loop with 2 MVCs */ +void +*memcpy6(void *dest, const void *src) +{ + return __builtin_memcpy (dest, src, 1537); +} + +/* memcpy6 uses a loop - check for the two load address instructions + used to increment src and dest. */ +/* { dg-final { scan-assembler-times "la" 2 } } */ + +/* { dg-final { scan-assembler-times "mvc" 11 } } */ |