aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel@linux.ibm.com>2018-08-06 15:01:00 +0000
committerAndreas Krebbel <krebbel@gcc.gnu.org>2018-08-06 15:01:00 +0000
commit8eed4721de54c66953af83791db19d75dd23bce8 (patch)
treebcd08467fe566c274f0bdc073ae37165288290b7 /gcc
parent2f171da9ccea4fc9c7918e22e6485b49626f8730 (diff)
downloadgcc-8eed4721de54c66953af83791db19d75dd23bce8.zip
gcc-8eed4721de54c66953af83791db19d75dd23bce8.tar.gz
gcc-8eed4721de54c66953af83791db19d75dd23bce8.tar.bz2
S/390: Don't unroll memory blk op loops
gcc/ChangeLog: 2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com> * config/s390/s390.c (s390_loop_unroll_adjust): Prevent small loops with memory block operations from getting unrolled. gcc/testsuite/ChangeLog: 2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com> * gcc.target/s390/nomemloopunroll-1.c: New test. From-SVN: r263336
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/s390/s390.c31
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c27
4 files changed, 64 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c36cffa..b15d546 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com>
+
+ * config/s390/s390.c (s390_loop_unroll_adjust): Prevent small
+ loops with memory block operations from getting unrolled.
+
2018-08-06 Ulrich Weigand <uweigand@de.ibm.com>
PR target/86807
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index ec588a2..aa34f56 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -390,6 +390,11 @@ static unsigned vfu_longrunning[NUM_SIDES];
base and index are registers of the class ADDR_REGS,
displacement is an unsigned 12-bit immediate constant. */
+/* The max number of insns of backend generated memset/memcpy/memcmp
+ loops. This value is used in the unroll adjust hook to detect such
+ loops. Current max is 9 coming from the memcmp loop. */
+#define BLOCK_MEM_OPS_LOOP_INSNS 9
+
struct s390_address
{
rtx base;
@@ -15385,9 +15390,29 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
for (i = 0; i < loop->num_nodes; i++)
FOR_BB_INSNS (bbs[i], insn)
if (INSN_P (insn) && INSN_CODE (insn) != -1)
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
- if (MEM_P (*iter))
- mem_count += 1;
+ {
+ rtx set;
+
+ /* The runtime of small loops with memory block operations
+ will be determined by the memory operation. Doing
+ unrolling doesn't help here. Measurements to confirm
+ this where only done on recent CPU levels. So better do
+ not change anything for older CPUs. */
+ if (s390_tune >= PROCESSOR_2964_Z13
+ && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
+ && ((set = single_set (insn)) != NULL_RTX)
+ && ((GET_MODE (SET_DEST (set)) == BLKmode
+ && (GET_MODE (SET_SRC (set)) == BLKmode
+ || SET_SRC (set) == const0_rtx))
+ || (GET_CODE (SET_SRC (set)) == COMPARE
+ && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
+ && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
+ return 1;
+
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+ if (MEM_P (*iter))
+ mem_count += 1;
+ }
free (bbs);
/* Prevent division by zero, and we do not need to adjust nunroll in this case. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index fb65bd0..0d8fc74 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com>
+
+ * gcc.target/s390/nomemloopunroll-1.c: New test.
+
2018-08-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/86386
diff --git a/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c b/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c
new file mode 100644
index 0000000..695d925
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -march=z13" } */
+
+/* 2x mvc */
+void *
+foo (char *a, int c, long len)
+{
+ return __builtin_memset (a, c, len);
+}
+
+/* 2x mvc */
+void
+bar (char *a, char *b)
+{
+ __builtin_memcpy (a, b, 30000);
+}
+
+/* 2x clc */
+
+int
+baz (char *a, char *b)
+{
+ return __builtin_memcmp (a, b, 30000);
+}
+
+/* { dg-final { scan-assembler-times "\\\smvc\\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\\sclc\\\s" 2 } } */