aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/cris/cris.cc
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2023-04-17 04:54:03 +0200
committerHans-Peter Nilsson <hp@bitrange.com>2023-05-04 02:46:53 +0200
commit8c361179c01dccb2abfff1ab447adcbea4429437 (patch)
tree869884cf09bf7faff22125ee87c5b0285a010fbd /gcc/config/cris/cris.cc
parente88d9e826ee68219dd4dbb2c0d8bdaee0a864301 (diff)
downloadgcc-8c361179c01dccb2abfff1ab447adcbea4429437.zip
gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.gz
gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.bz2
CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s
This kind of transformation seems pretty generic and might be a candidate for adding to the middle-end, perhaps as part of combine. I noticed these happened more often for LRA, which is the reason I went on this track of low-hanging-fruit-microoptimizations that are such an itch when noticing them, inspecting generated code for libgcc. Unfortunately, this one improves coremark only by a few cycles at the beginning or end (<0.0005%) for cris-elf -march=v10. The size of the coremark code is down by 0.4% (0.22% pre-lra). Using an iterator from the start because other binary operations will be added and their define_peephole2's would look exactly the same for the .md part. Some existing and-peephole2-related tests suffered, because many of them were using patterns with only contiguous 1:s in them: adjusted. Also, spotted and fixed, by adding a space, some scan-assembler-strings that were prone to spurious identifier or file name matches. gcc: * config/cris/cris.cc (cris_split_constant): New function. * config/cris/cris.md (splitop): New iterator. (opsplit1): New define_peephole2. * config/cris/cris-protos.h (cris_split_constant): Declare. (cris_splittable_constant_p): New macro. gcc/testsuite: * gcc.target/cris/peep2-andsplit1.c: New test. * gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c, gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c: Adjust values to avoid interference with "opsplit1" with AND. Add whitespace to match-strings that may be confused with identifiers or file names.
Diffstat (limited to 'gcc/config/cris/cris.cc')
-rw-r--r--gcc/config/cris/cris.cc78
1 files changed, 78 insertions, 0 deletions
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 496a1a0..44b4545 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2635,6 +2635,84 @@ cris_split_movdx (rtx *operands)
return val;
}
+/* Try to split the constant WVAL into a number of separate insns of less cost
+ for the rtx operation CODE and the metric SPEED than using val as-is.
+ Generate those insns if GENERATE. DEST holds the destination, and OP holds
+ the other operand for binary operations; NULL when CODE is SET. Return the
+ number of insns for the operation or 0 if the constant can't be usefully
+ split (because it's already minimal or is not within range for the known
+ methods). Parts stolen from arm.cc. */
+
+int
+cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code,
+ machine_mode mode, bool speed ATTRIBUTE_UNUSED,
+ bool generate, rtx dest, rtx op)
+{
+ int32_t ival = (int32_t) wval;
+ uint32_t uval = (uint32_t) wval;
+
+ if (code != AND || IN_RANGE(ival, -32, 31)
+ /* Implemented using movu.[bw] elsewhere. */
+ || ival == 255 || ival == 65535
+ /* Implemented using clear.[bw] elsewhere. */
+ || uval == 0xffffff00 || uval == 0xffff0000)
+ return 0;
+
+ int i;
+
+ int msb_zeros = 0;
+ int lsb_zeros = 0;
+
+ /* Count number of leading zeros. */
+ for (i = 31; i >= 0; i--)
+ {
+ if ((uval & (1 << i)) == 0)
+ msb_zeros++;
+ else
+ break;
+ }
+
+ /* Count number of trailing zero's. */
+ for (i = 0; i <= 31; i++)
+ {
+ if ((uval & (1 << i)) == 0)
+ lsb_zeros++;
+ else
+ break;
+ }
+
+ /* Is there a lowest or highest part that is zero (but not both)
+ and the non-zero part is just ones? */
+ if (exact_log2 ((uval >> lsb_zeros) + 1) > 0
+ && (lsb_zeros != 0) != (msb_zeros != 0))
+ {
+ /* If so, we can shift OP in the zero direction, then back. We don't
+ nominally win anything for uval < 256, except that the insns are split
+ into slottable insns so it's always beneficial. */
+ if (generate)
+ {
+ if (mode != SImode)
+ {
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ op = gen_rtx_REG (SImode, REGNO (op));
+ }
+ if (msb_zeros)
+ {
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros)));
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros)));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros)));
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros)));
+ }
+ }
+ return 2;
+ }
+
+ return 0;
+}
+
/* Try to change a comparison against a constant to be against zero, and
an unsigned compare against zero to be an equality test. Beware:
only valid for compares of integer-type operands. Also, note that we