diff options
author | Hans-Peter Nilsson <hp@axis.com> | 2023-04-17 04:54:03 +0200 |
---|---|---|
committer | Hans-Peter Nilsson <hp@bitrange.com> | 2023-05-04 02:46:53 +0200 |
commit | 8c361179c01dccb2abfff1ab447adcbea4429437 (patch) | |
tree | 869884cf09bf7faff22125ee87c5b0285a010fbd /gcc/config/cris | |
parent | e88d9e826ee68219dd4dbb2c0d8bdaee0a864301 (diff) | |
download | gcc-8c361179c01dccb2abfff1ab447adcbea4429437.zip gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.gz gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.bz2 |
CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s
This kind of transformation seems pretty generic and might be a
candidate for adding to the middle-end, perhaps as part of combine.
I noticed these happened more often for LRA, which is the reason I
went on this track of low-hanging-fruit-microoptimizations that are
such an itch when noticing them, inspecting generated code for libgcc.
Unfortunately, this one improves coremark only by a few cycles at the
beginning or end (<0.0005%) for cris-elf -march=v10. The size of the
coremark code is down by 0.4% (0.22% pre-lra).
Using an iterator from the start because other binary operations will
be added and their define_peephole2's would look exactly the same for
the .md part.
Some existing and-peephole2-related tests suffered, because many of
them were using patterns with only contiguous 1:s in them: adjusted.
Also, spotted and fixed, by adding a space, some
scan-assembler-strings that were prone to spurious identifier or file
name matches.
gcc:
* config/cris/cris.cc (cris_split_constant): New function.
* config/cris/cris.md (splitop): New iterator.
(opsplit1): New define_peephole2.
* config/cris/cris-protos.h (cris_split_constant): Declare.
(cris_splittable_constant_p): New macro.
gcc/testsuite:
* gcc.target/cris/peep2-andsplit1.c: New test.
* gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c,
gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c:
Adjust values to avoid interference with "opsplit1" with AND. Add
whitespace to match-strings that may be confused with identifiers
or file names.
Diffstat (limited to 'gcc/config/cris')
-rw-r--r-- | gcc/config/cris/cris-protos.h | 6 | ||||
-rw-r--r-- | gcc/config/cris/cris.cc | 78 | ||||
-rw-r--r-- | gcc/config/cris/cris.md | 26 |
3 files changed, 110 insertions, 0 deletions
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h index de9eacb..666e04f 100644 --- a/gcc/config/cris/cris-protos.h +++ b/gcc/config/cris/cris-protos.h @@ -44,6 +44,12 @@ extern rtx cris_emit_movem_store (rtx, rtx, int, bool); extern rtx_insn *cris_emit_insn (rtx x); extern void cris_order_for_addsi3 (rtx *, int); extern void cris_emit_trap_for_misalignment (rtx); +extern int cris_split_constant (HOST_WIDE_INT, enum rtx_code, + machine_mode, bool, + bool generate = false, + rtx dest = NULL_RTX, + rtx op = NULL_RTX); +#define cris_splittable_constant_p cris_split_constant #endif /* RTX_CODE */ extern void cris_asm_output_label_ref (FILE *, char *); extern void cris_asm_output_ident (const char *); diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index 496a1a0..44b4545 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -2635,6 +2635,84 @@ cris_split_movdx (rtx *operands) return val; } +/* Try to split the constant WVAL into a number of separate insns of less cost + for the rtx operation CODE and the metric SPEED than using val as-is. + Generate those insns if GENERATE. DEST holds the destination, and OP holds + the other operand for binary operations; NULL when CODE is SET. Return the + number of insns for the operation or 0 if the constant can't be usefully + split (because it's already minimal or is not within range for the known + methods). Parts stolen from arm.cc. */ + +int +cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code, + machine_mode mode, bool speed ATTRIBUTE_UNUSED, + bool generate, rtx dest, rtx op) +{ + int32_t ival = (int32_t) wval; + uint32_t uval = (uint32_t) wval; + + if (code != AND || IN_RANGE(ival, -32, 31) + /* Implemented using movu.[bw] elsewhere. */ + || ival == 255 || ival == 65535 + /* Implemented using clear.[bw] elsewhere. */ + || uval == 0xffffff00 || uval == 0xffff0000) + return 0; + + int i; + + int msb_zeros = 0; + int lsb_zeros = 0; + + /* Count number of leading zeros. */ + for (i = 31; i >= 0; i--) + { + if ((uval & (1 << i)) == 0) + msb_zeros++; + else + break; + } + + /* Count number of trailing zero's. */ + for (i = 0; i <= 31; i++) + { + if ((uval & (1 << i)) == 0) + lsb_zeros++; + else + break; + } + + /* Is there a lowest or highest part that is zero (but not both) + and the non-zero part is just ones? */ + if (exact_log2 ((uval >> lsb_zeros) + 1) > 0 + && (lsb_zeros != 0) != (msb_zeros != 0)) + { + /* If so, we can shift OP in the zero direction, then back. We don't + nominally win anything for uval < 256, except that the insns are split + into slottable insns so it's always beneficial. */ + if (generate) + { + if (mode != SImode) + { + dest = gen_rtx_REG (SImode, REGNO (dest)); + op = gen_rtx_REG (SImode, REGNO (op)); + } + if (msb_zeros) + { + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros))); + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros))); + } + else + { + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros))); + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros))); + } + } + return 2; + } + + return 0; +} + /* Try to change a comparison against a constant to be against zero, and an unsigned compare against zero to be an equality test. Beware: only valid for compares of integer-type operands. Also, note that we diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index 3796a78..b488224 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -208,6 +208,9 @@ ;; Ditto, commutative operators (i.e. not minus). (define_code_iterator plusumin [plus umin]) +;; For opsplit1. +(define_code_iterator splitop [and]) + ;; The addsubbo and nd code-attributes form a hack. We need to output ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd ;; need to refer to one iterator from the next. But, that can't be @@ -2888,6 +2891,29 @@ operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode)); }) +;; Large (read: non-quick) numbers can sometimes be AND:ed by other means. +;; Testcase: gcc.target/cris/peep2-andsplit1.c +(define_peephole2 ; opsplit1 + [(parallel + [(set (match_operand 0 "register_operand") + (splitop + (match_operand 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC CRIS_CC0_REGNUM))])] + ;; Operands 0 and 1 can be separate identical objects, at least + ;; after matching peepholes above. */ + "REGNO (operands[0]) == REGNO (operands[1]) + && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>, + GET_MODE (operands[0]), + optimize_function_for_speed_p (cfun))" + [(const_int 0)] +{ + cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]), + optimize_function_for_speed_p (cfun), + true, operands[0], operands[0]); + DONE; +}) + ;; Fix a decomposed szext: fuse it with the memory operand of the ;; load. This is typically the sign-extension part of a decomposed ;; "indirect offset" address. |