aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMarkus Trippelsdorf <markus@trippelsdorf.de>2017-12-16 04:28:08 +0000
committerJeff Law <law@gcc.gnu.org>2017-12-15 21:28:08 -0700
commit02308bd3ec458762af1109d0ca6d2be757d555a0 (patch)
tree5c27f6c88574b704b4ab412f7415a72bdf436ad6 /gcc
parentbe528ae9aa5a04611019c42f4d94fed7e4727dad (diff)
downloadgcc-02308bd3ec458762af1109d0ca6d2be757d555a0.zip
gcc-02308bd3ec458762af1109d0ca6d2be757d555a0.tar.gz
gcc-02308bd3ec458762af1109d0ca6d2be757d555a0.tar.bz2
re PR target/83358 (division not converted with Intel tuning since r253934)
2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de> PR target/83358 * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase div/mod latencies a bit. PR target/83358 * gcc.target/i386/pr83358-1.c: New test. * gcc.target/i386/pr83358-2.c: New test. From-SVN: r255739
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/x86-tune-costs.h18
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr83358-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/pr83358-2.c31
5 files changed, 84 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4528b6d..c2d037a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
+
+ PR target/83358
+ * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase
+ div/mod latencies a bit.
+
2017-12-15 Jeff Law <law@redhat.com>
PR tree-optimization/36550
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 312467d..64821933 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1541,9 +1541,11 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- COSTS_N_INSNS (11), /* SI */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (76), /* DI */
COSTS_N_INSNS (76)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
@@ -2342,11 +2344,11 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- /* 8-11 */
- COSTS_N_INSNS (11), /* SI */
- /* 24-81 */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (81), /* DI */
COSTS_N_INSNS (81)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5c91661..10dc8b2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
+
+ PR target/83358
+ * gcc.target/i386/pr83358-1.c: New test.
+ * gcc.target/i386/pr83358-2.c: New test.
+
2017-12-15 Jeff Law <law@redhat.com>
PR tree-optimization/36550
diff --git a/gcc/testsuite/gcc.target/i386/pr83358-1.c b/gcc/testsuite/gcc.target/i386/pr83358-1.c
new file mode 100644
index 0000000..96427b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr83358-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr83358-2.c b/gcc/testsuite/gcc.target/i386/pr83358-2.c
new file mode 100644
index 0000000..f6039bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr83358-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=skylake-avx512" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */