diff options
author | Hans-Peter Nilsson <hp@axis.com> | 2023-04-23 06:21:13 +0200 |
---|---|---|
committer | Hans-Peter Nilsson <hp@bitrange.com> | 2023-05-06 02:02:14 +0200 |
commit | 35b7618e3ae25b3a293c456e24edc5432a936e22 (patch) | |
tree | a1cd31855f7f8fba7e4a22dcab41c31fa219727f | |
parent | fe50e419086f5b781b6fe8242741e6213b788337 (diff) | |
download | gcc-35b7618e3ae25b3a293c456e24edc5432a936e22.zip gcc-35b7618e3ae25b3a293c456e24edc5432a936e22.tar.gz gcc-35b7618e3ae25b3a293c456e24edc5432a936e22.tar.bz2 |
CRIS: peephole2 an add into two addq or subq
Unfortunately, doesn't cause a performance improvement for coremark,
but happens a few times in newlib, just enough to affect coremark
0.01% by size (or 4 bytes, and three cycles (__fwalk_sglue and
__vfiprintf_r each two bytes).
gcc:
* config/cris/cris.md (splitop): Add PLUS.
* config/cris/cris.cc (cris_split_constant): Also handle
PLUS when a split into two insns may be useful.
gcc/testsuite:
* gcc.target/cris/peep2-addsplit1.c: New test.
-rw-r--r-- | gcc/config/cris/cris.cc | 25 | ||||
-rw-r--r-- | gcc/config/cris/cris.md | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/cris/peep2-addsplit1.c | 52 |
3 files changed, 81 insertions, 2 deletions
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index 44b4545..1c7289b 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -2651,7 +2651,30 @@ cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code, int32_t ival = (int32_t) wval; uint32_t uval = (uint32_t) wval; - if (code != AND || IN_RANGE(ival, -32, 31) + /* Can we do with two addq or two subq, improving chances of filling a + delay-slot? At worst, we break even, both performance and + size-wise. */ + if (code == PLUS + && (IN_RANGE (ival, -63 * 2, -63 - 1) + || IN_RANGE (ival, 63 + 1, 63 * 2))) + { + if (generate) + { + int sign = ival < 0 ? -1 : 1; + int aval = abs (ival); + + if (mode != SImode) + { + dest = gen_rtx_REG (SImode, REGNO (dest)); + op = gen_rtx_REG (SImode, REGNO (op)); + } + emit_insn (gen_addsi3 (dest, op, GEN_INT (63 * sign))); + emit_insn (gen_addsi3 (dest, op, GEN_INT ((aval - 63) * sign))); + } + return 2; + } + + if (code != AND || IN_RANGE (ival, -32, 31) /* Implemented using movu.[bw] elsewhere. */ || ival == 255 || ival == 65535 /* Implemented using clear.[bw] elsewhere. */ diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index 9e3fa78c3..7504b63 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -209,7 +209,7 @@ (define_code_iterator plusumin [plus umin]) ;; For opsplit1. -(define_code_iterator splitop [and]) +(define_code_iterator splitop [and plus]) ;; The addsubbo and nd code-attributes form a hack. We need to output ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd @@ -2984,6 +2984,10 @@ ;; Large (read: non-quick) numbers can sometimes be AND:ed by other means. ;; Testcase: gcc.target/cris/peep2-andsplit1.c +;; +;; Another case is add<ext> N,rx with -126..-64,64..126: it has the same +;; size and execution time as two addq or subq, but addq and subq can fill +;; a delay-slot. (define_peephole2 ; opsplit1 [(parallel [(set (match_operand 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c new file mode 100644 index 0000000..b69c0d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c @@ -0,0 +1,52 @@ +/* Check that "opsplit1" with PLUS does its job. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-leading-underscore" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +int addsi (int x) +{ + return x + 64; +} + +char addqi (char x) +{ + return x + 126; +} + +short addhi (short x) +{ + return x - 64; +} + +unsigned short addhi2 (short x) +{ + return x - 126; +} + +/* +** addsi: +** addq 63,.r10 +** ret +** addq 1,.r10 +*/ + +/* +** addqi: +** addq 63,.r10 +** ret +** addq 63,.r10 +*/ + +/* +** addhi: +** subq 63,.r10 +** ret +** subq 1,.r10 +*/ + +/* +** addhi2: +** subq 63,.r10 +** ret +** subq 63,.r10 +*/ |