diff options
author | Markus Trippelsdorf <markus@trippelsdorf.de> | 2017-12-16 04:28:08 +0000 |
---|---|---|
committer | Jeff Law <law@gcc.gnu.org> | 2017-12-15 21:28:08 -0700 |
commit | 02308bd3ec458762af1109d0ca6d2be757d555a0 (patch) | |
tree | 5c27f6c88574b704b4ab412f7415a72bdf436ad6 /gcc | |
parent | be528ae9aa5a04611019c42f4d94fed7e4727dad (diff) | |
download | gcc-02308bd3ec458762af1109d0ca6d2be757d555a0.zip gcc-02308bd3ec458762af1109d0ca6d2be757d555a0.tar.gz gcc-02308bd3ec458762af1109d0ca6d2be757d555a0.tar.bz2 |
re PR target/83358 (division not converted with Intel tuning since r253934)
2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
PR target/83358
* config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase
div/mod latencies a bit.
PR target/83358
* gcc.target/i386/pr83358-1.c: New test.
* gcc.target/i386/pr83358-2.c: New test.
From-SVN: r255739
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 18 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr83358-1.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr83358-2.c | 31 |
5 files changed, 84 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4528b6d..c2d037a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de> + + PR target/83358 + * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase + div/mod latencies a bit. + 2017-12-15 Jeff Law <law@redhat.com> PR tree-optimization/36550 diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 312467d..64821933 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1541,9 +1541,11 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (4), /* DI */ COSTS_N_INSNS (4)}, /* other */ 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (8), /* HI */ - COSTS_N_INSNS (11), /* SI */ + /* Expanding div/mod currently doesn't consider parallelism. So the cost + model is not realistic. We compensate by increasing the latencies a bit. */ + {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (14), /* SI */ COSTS_N_INSNS (76), /* DI */ COSTS_N_INSNS (76)}, /* other */ COSTS_N_INSNS (1), /* cost of movsx */ @@ -2342,11 +2344,11 @@ struct processor_costs core_cost = { COSTS_N_INSNS (4), /* DI */ COSTS_N_INSNS (4)}, /* other */ 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (8), /* HI */ - /* 8-11 */ - COSTS_N_INSNS (11), /* SI */ - /* 24-81 */ + /* Expanding div/mod currently doesn't consider parallelism. So the cost + model is not realistic. We compensate by increasing the latencies a bit. */ + {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (14), /* SI */ COSTS_N_INSNS (81), /* DI */ COSTS_N_INSNS (81)}, /* other */ COSTS_N_INSNS (1), /* cost of movsx */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5c91661..10dc8b2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de> + + PR target/83358 + * gcc.target/i386/pr83358-1.c: New test. + * gcc.target/i386/pr83358-2.c: New test. + 2017-12-15 Jeff Law <law@redhat.com> PR tree-optimization/36550 diff --git a/gcc/testsuite/gcc.target/i386/pr83358-1.c b/gcc/testsuite/gcc.target/i386/pr83358-1.c new file mode 100644 index 0000000..96427b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83358-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=core2" } */ + +#include <stdint.h> + +void bin2ascii(uint64_t val, char *dst) { + const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000; + int64_t hix = val / POW10_10; + int64_t lox = val % POW10_10; + int32_t v0 = hix / 100000; + int32_t v1 = hix % 100000; + int32_t v2 = lox / 100000; + int32_t v3 = lox % 100000; + for (int i = 4; i != 0; --i) { + dst[i + 0 * 5] = v0 % 10 + '0'; + v0 /= 10; + dst[i + 1 * 5] = v1 % 10 + '0'; + v1 /= 10; + dst[i + 2 * 5] = v2 % 10 + '0'; + v2 /= 10; + dst[i + 3 * 5] = v3 % 10 + '0'; + v3 /= 10; + } + dst[0 * 5] = v0 + '0'; + dst[1 * 5] = v1 + '0'; + dst[2 * 5] = v2 + '0'; + dst[3 * 5] = v3 + '0'; + dst[4 * 5] = 0; +} + +/* { dg-final { scan-assembler-not "idiv" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr83358-2.c b/gcc/testsuite/gcc.target/i386/pr83358-2.c new file mode 100644 index 0000000..f6039bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83358-2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=skylake-avx512" } */ + +#include <stdint.h> + +void bin2ascii(uint64_t val, char *dst) { + const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000; + int64_t hix = val / POW10_10; + int64_t lox = val % POW10_10; + int32_t v0 = hix / 100000; + int32_t v1 = hix % 100000; + int32_t v2 = lox / 100000; + int32_t v3 = lox % 100000; + for (int i = 4; i != 0; --i) { + dst[i + 0 * 5] = v0 % 10 + '0'; + v0 /= 10; + dst[i + 1 * 5] = v1 % 10 + '0'; + v1 /= 10; + dst[i + 2 * 5] = v2 % 10 + '0'; + v2 /= 10; + dst[i + 3 * 5] = v3 % 10 + '0'; + v3 /= 10; + } + dst[0 * 5] = v0 + '0'; + dst[1 * 5] = v1 + '0'; + dst[2 * 5] = v2 + '0'; + dst[3 * 5] = v3 + '0'; + dst[4 * 5] = 0; +} + +/* { dg-final { scan-assembler-not "idiv" } } */ |