From 8c361179c01dccb2abfff1ab447adcbea4429437 Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Mon, 17 Apr 2023 04:54:03 +0200 Subject: CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s This kind of transformation seems pretty generic and might be a candidate for adding to the middle-end, perhaps as part of combine. I noticed these happened more often for LRA, which is the reason I went on this track of low-hanging-fruit-microoptimizations that are such an itch when noticing them, inspecting generated code for libgcc. Unfortunately, this one improves coremark only by a few cycles at the beginning or end (<0.0005%) for cris-elf -march=v10. The size of the coremark code is down by 0.4% (0.22% pre-lra). Using an iterator from the start because other binary operations will be added and their define_peephole2's would look exactly the same for the .md part. Some existing and-peephole2-related tests suffered, because many of them were using patterns with only contiguous 1:s in them: adjusted. Also, spotted and fixed, by adding a space, some scan-assembler-strings that were prone to spurious identifier or file name matches. gcc: * config/cris/cris.cc (cris_split_constant): New function. * config/cris/cris.md (splitop): New iterator. (opsplit1): New define_peephole2. * config/cris/cris-protos.h (cris_split_constant): Declare. (cris_splittable_constant_p): New macro. gcc/testsuite: * gcc.target/cris/peep2-andsplit1.c: New test. * gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c, gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c: Adjust values to avoid interference with "opsplit1" with AND. Add whitespace to match-strings that may be confused with identifiers or file names. --- gcc/config/cris/cris.cc | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'gcc/config/cris/cris.cc') diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index 496a1a0..44b4545 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -2635,6 +2635,84 @@ cris_split_movdx (rtx *operands) return val; } +/* Try to split the constant WVAL into a number of separate insns of less cost + for the rtx operation CODE and the metric SPEED than using val as-is. + Generate those insns if GENERATE. DEST holds the destination, and OP holds + the other operand for binary operations; NULL when CODE is SET. Return the + number of insns for the operation or 0 if the constant can't be usefully + split (because it's already minimal or is not within range for the known + methods). Parts stolen from arm.cc. */ + +int +cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code, + machine_mode mode, bool speed ATTRIBUTE_UNUSED, + bool generate, rtx dest, rtx op) +{ + int32_t ival = (int32_t) wval; + uint32_t uval = (uint32_t) wval; + + if (code != AND || IN_RANGE(ival, -32, 31) + /* Implemented using movu.[bw] elsewhere. */ + || ival == 255 || ival == 65535 + /* Implemented using clear.[bw] elsewhere. */ + || uval == 0xffffff00 || uval == 0xffff0000) + return 0; + + int i; + + int msb_zeros = 0; + int lsb_zeros = 0; + + /* Count number of leading zeros. */ + for (i = 31; i >= 0; i--) + { + if ((uval & (1 << i)) == 0) + msb_zeros++; + else + break; + } + + /* Count number of trailing zero's. */ + for (i = 0; i <= 31; i++) + { + if ((uval & (1 << i)) == 0) + lsb_zeros++; + else + break; + } + + /* Is there a lowest or highest part that is zero (but not both) + and the non-zero part is just ones? */ + if (exact_log2 ((uval >> lsb_zeros) + 1) > 0 + && (lsb_zeros != 0) != (msb_zeros != 0)) + { + /* If so, we can shift OP in the zero direction, then back. We don't + nominally win anything for uval < 256, except that the insns are split + into slottable insns so it's always beneficial. */ + if (generate) + { + if (mode != SImode) + { + dest = gen_rtx_REG (SImode, REGNO (dest)); + op = gen_rtx_REG (SImode, REGNO (op)); + } + if (msb_zeros) + { + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros))); + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros))); + } + else + { + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros))); + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros))); + } + } + return 2; + } + + return 0; +} + /* Try to change a comparison against a constant to be against zero, and an unsigned compare against zero to be an equality test. Beware: only valid for compares of integer-type operands. Also, note that we -- cgit v1.1