aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2023-04-20 07:14:26 +0200
committerHans-Peter Nilsson <hp@bitrange.com>2023-05-06 01:15:30 +0200
commit0a5e8d492a7efc739110289bf97ef4c9fe0674f4 (patch)
treeaac4d5968888f59b74b17ffdf8de972bc6c57543 /gcc
parent07527e3eabb00ada0e7c9e083084e4e56d97f34f (diff)
downloadgcc-0a5e8d492a7efc739110289bf97ef4c9fe0674f4.zip
gcc-0a5e8d492a7efc739110289bf97ef4c9fe0674f4.tar.gz
gcc-0a5e8d492a7efc739110289bf97ef4c9fe0674f4.tar.bz2
CRIS: peephole2 a lsrq into a lslq+lsrq pair
Observed after opsplit1 with AND in libgcc floating-point functions, like the first spottings of opsplit1/AND opportunities. Two patterns are nominally needed, as the peephole2 optimizer continues from the *first replacement* insn, not from a minimum context for general matching; one that includes it as the last match. But, the "free-standing" opportunity (three shifts) didn't match by itself in a gcc build of libraries plus running the test-suite, and thus deemed uninteresting and left out. (As expected; if it had matched, that'd have indicated a previously missed optimization or other problem elsewhere.) Only the one that includes the previous define_peephole2 that may generate the sequence (i.e. opsplit1/AND), matches easily. Coremark results aren't impressive though: 0.003% improvement in speed and slightly less than 0.1% in size. A testcase is added to match and another one to cover a case of movulsr checking that it's used; it's preferable to lsrandsplit when both would match. gcc: * config/cris/cris.md (lsrandsplit1): New define_peephole2. gcc/testsuite: * gcc.target/cris/peep2-lsrandsplit1.c, gcc.target/cris/peep2-movulsr2.c: New tests.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/cris/cris.md53
-rw-r--r--gcc/testsuite/gcc.target/cris/peep2-lsrandsplit1.c19
-rw-r--r--gcc/testsuite/gcc.target/cris/peep2-movulsr2.c19
3 files changed, 91 insertions, 0 deletions
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index b488224..e47787f 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -2690,6 +2690,59 @@
= INTVAL (operands[2]) <= 0xff ? GEN_INT (0xff) : GEN_INT (0xffff);
})
+;; Avoid, after opsplit1 with AND (below), sequences of:
+;; lsrq N,R
+;; lslq M,R
+;; lsrq M,R
+;; (N < M), where we can fold the first lsrq into the lslq-lsrq, like:
+;; lslq M-N,R
+;; lsrq M,R
+;; We have to match this before opsplit1 below and before other peephole2s of
+;; lesser value, since peephole2 matching resumes at the first generated insn,
+;; and thus wouldn't match a pattern of the three shifts after opsplit1/AND.
+;; Note that this lsrandsplit1 is in turn of lesser value than movulsr, since
+;; that one doesn't require the same operand for source and destination, but
+;; they happen to be the same hard-register at peephole2 time even if
+;; naturally separated like in peep2-movulsr2.c, thus this placement. (Source
+;; and destination will be re-separated and the move optimized out in
+;; cprop_hardreg at time of this writing.)
+;; Testcase: gcc.target/cris/peep2-lsrandsplit1.c
+(define_peephole2 ; lsrandsplit1
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand")
+ (lshiftrt:SI
+ (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])
+ (parallel
+ [(set (match_operand 3 "register_operand")
+ (and
+ (match_operand 4 "register_operand")
+ (match_operand 5 "const_int_operand")))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])]
+ "REGNO (operands[0]) == REGNO (operands[1])
+ && REGNO (operands[0]) == REGNO (operands[3])
+ && REGNO (operands[0]) == REGNO (operands[4])
+ && (INTVAL (operands[2])
+ < (clz_hwi (INTVAL (operands[5])) - (HOST_BITS_PER_WIDE_INT - 32)))
+ && cris_splittable_constant_p (INTVAL (operands[5]), AND, SImode,
+ optimize_function_for_speed_p (cfun)) == 2"
+ ;; We're guaranteed by the above hw_clz test (certainly non-zero) and the
+ ;; test for a two-insn return-value from cris_splittable_constant_p, that
+ ;; the cris_splittable_constant_p AND-replacement would be lslq-lsrq.
+ [(parallel
+ [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 9)))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])
+ (parallel
+ [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 10)))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])]
+{
+ HOST_WIDE_INT shiftval
+ = clz_hwi (INTVAL (operands[5])) - (HOST_BITS_PER_WIDE_INT - 32);
+ operands[9] = GEN_INT (shiftval - INTVAL (operands[2]));
+ operands[10] = GEN_INT (shiftval);
+})
+
;; Testcase for the following four peepholes: gcc.target/cris/peep2-xsrand.c
(define_peephole2 ; asrandb
diff --git a/gcc/testsuite/gcc.target/cris/peep2-lsrandsplit1.c b/gcc/testsuite/gcc.target/cris/peep2-lsrandsplit1.c
new file mode 100644
index 0000000..0da6453
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/peep2-lsrandsplit1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler-not " and" } } */
+/* { dg-final { scan-assembler-times "lsrq " 2 } } */
+/* { dg-final { scan-assembler-times "lslq " 2 } } */
+/* { dg-options "-O2" } */
+
+/* Test the "lsrlsllsr1" peephole2 trivially. */
+
+unsigned int
+andwlsr (unsigned int x)
+{
+ return (x >> 17) & 0x7ff;
+}
+
+int
+andwasr (int x)
+{
+ return (x >> 17) & 0x7ff;
+}
diff --git a/gcc/testsuite/gcc.target/cris/peep2-movulsr2.c b/gcc/testsuite/gcc.target/cris/peep2-movulsr2.c
new file mode 100644
index 0000000..4696e71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/peep2-movulsr2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler "movu.w " } } */
+/* { dg-final { scan-assembler "movu.b " } } */
+/* { dg-final { scan-assembler-not "and.. " } } */
+/* { dg-options "-O2" } */
+
+/* Test the "movulsrb", "movulsrw" peephole2:s trivially. */
+
+unsigned int
+movulsrb (unsigned y, unsigned int x)
+{
+ return (x & 255) >> 1;
+}
+
+unsigned int
+movulsrw (unsigned y, unsigned int x)
+{
+ return (x & 65535) >> 4;
+}