aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/cris
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2023-04-17 04:54:03 +0200
committerHans-Peter Nilsson <hp@bitrange.com>2023-05-04 02:46:53 +0200
commit8c361179c01dccb2abfff1ab447adcbea4429437 (patch)
tree869884cf09bf7faff22125ee87c5b0285a010fbd /gcc/config/cris
parente88d9e826ee68219dd4dbb2c0d8bdaee0a864301 (diff)
downloadgcc-8c361179c01dccb2abfff1ab447adcbea4429437.zip
gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.gz
gcc-8c361179c01dccb2abfff1ab447adcbea4429437.tar.bz2
CRIS: peephole2 an "and" with a contiguous "one-sided" sequences of 1s
This kind of transformation seems pretty generic and might be a candidate for adding to the middle-end, perhaps as part of combine. I noticed these happened more often for LRA, which is the reason I went on this track of low-hanging-fruit-microoptimizations that are such an itch when noticing them, inspecting generated code for libgcc. Unfortunately, this one improves coremark only by a few cycles at the beginning or end (<0.0005%) for cris-elf -march=v10. The size of the coremark code is down by 0.4% (0.22% pre-lra). Using an iterator from the start because other binary operations will be added and their define_peephole2's would look exactly the same for the .md part. Some existing and-peephole2-related tests suffered, because many of them were using patterns with only contiguous 1:s in them: adjusted. Also, spotted and fixed, by adding a space, some scan-assembler-strings that were prone to spurious identifier or file name matches. gcc: * config/cris/cris.cc (cris_split_constant): New function. * config/cris/cris.md (splitop): New iterator. (opsplit1): New define_peephole2. * config/cris/cris-protos.h (cris_split_constant): Declare. (cris_splittable_constant_p): New macro. gcc/testsuite: * gcc.target/cris/peep2-andsplit1.c: New test. * gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c, gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c: Adjust values to avoid interference with "opsplit1" with AND. Add whitespace to match-strings that may be confused with identifiers or file names.
Diffstat (limited to 'gcc/config/cris')
-rw-r--r--gcc/config/cris/cris-protos.h6
-rw-r--r--gcc/config/cris/cris.cc78
-rw-r--r--gcc/config/cris/cris.md26
3 files changed, 110 insertions, 0 deletions
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index de9eacb..666e04f 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -44,6 +44,12 @@ extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
extern rtx_insn *cris_emit_insn (rtx x);
extern void cris_order_for_addsi3 (rtx *, int);
extern void cris_emit_trap_for_misalignment (rtx);
+extern int cris_split_constant (HOST_WIDE_INT, enum rtx_code,
+ machine_mode, bool,
+ bool generate = false,
+ rtx dest = NULL_RTX,
+ rtx op = NULL_RTX);
+#define cris_splittable_constant_p cris_split_constant
#endif /* RTX_CODE */
extern void cris_asm_output_label_ref (FILE *, char *);
extern void cris_asm_output_ident (const char *);
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 496a1a0..44b4545 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2635,6 +2635,84 @@ cris_split_movdx (rtx *operands)
return val;
}
+/* Try to split the constant WVAL into a number of separate insns of less cost
+ for the rtx operation CODE and the metric SPEED than using val as-is.
+ Generate those insns if GENERATE. DEST holds the destination, and OP holds
+ the other operand for binary operations; NULL when CODE is SET. Return the
+ number of insns for the operation or 0 if the constant can't be usefully
+ split (because it's already minimal or is not within range for the known
+ methods). Parts stolen from arm.cc. */
+
+int
+cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code,
+ machine_mode mode, bool speed ATTRIBUTE_UNUSED,
+ bool generate, rtx dest, rtx op)
+{
+ int32_t ival = (int32_t) wval;
+ uint32_t uval = (uint32_t) wval;
+
+ if (code != AND || IN_RANGE(ival, -32, 31)
+ /* Implemented using movu.[bw] elsewhere. */
+ || ival == 255 || ival == 65535
+ /* Implemented using clear.[bw] elsewhere. */
+ || uval == 0xffffff00 || uval == 0xffff0000)
+ return 0;
+
+ int i;
+
+ int msb_zeros = 0;
+ int lsb_zeros = 0;
+
+ /* Count number of leading zeros. */
+ for (i = 31; i >= 0; i--)
+ {
+ if ((uval & (1 << i)) == 0)
+ msb_zeros++;
+ else
+ break;
+ }
+
+ /* Count number of trailing zero's. */
+ for (i = 0; i <= 31; i++)
+ {
+ if ((uval & (1 << i)) == 0)
+ lsb_zeros++;
+ else
+ break;
+ }
+
+ /* Is there a lowest or highest part that is zero (but not both)
+ and the non-zero part is just ones? */
+ if (exact_log2 ((uval >> lsb_zeros) + 1) > 0
+ && (lsb_zeros != 0) != (msb_zeros != 0))
+ {
+ /* If so, we can shift OP in the zero direction, then back. We don't
+ nominally win anything for uval < 256, except that the insns are split
+ into slottable insns so it's always beneficial. */
+ if (generate)
+ {
+ if (mode != SImode)
+ {
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ op = gen_rtx_REG (SImode, REGNO (op));
+ }
+ if (msb_zeros)
+ {
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros)));
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros)));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros)));
+ emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros)));
+ }
+ }
+ return 2;
+ }
+
+ return 0;
+}
+
/* Try to change a comparison against a constant to be against zero, and
an unsigned compare against zero to be an equality test. Beware:
only valid for compares of integer-type operands. Also, note that we
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 3796a78..b488224 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -208,6 +208,9 @@
;; Ditto, commutative operators (i.e. not minus).
(define_code_iterator plusumin [plus umin])
+;; For opsplit1.
+(define_code_iterator splitop [and])
+
;; The addsubbo and nd code-attributes form a hack. We need to output
;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
;; need to refer to one iterator from the next. But, that can't be
@@ -2888,6 +2891,29 @@
operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
})
+;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
+;; Testcase: gcc.target/cris/peep2-andsplit1.c
+(define_peephole2 ; opsplit1
+ [(parallel
+ [(set (match_operand 0 "register_operand")
+ (splitop
+ (match_operand 1 "register_operand")
+ (match_operand 2 "const_int_operand")))
+ (clobber (reg:CC CRIS_CC0_REGNUM))])]
+ ;; Operands 0 and 1 can be separate identical objects, at least
+ ;; after matching peepholes above. */
+ "REGNO (operands[0]) == REGNO (operands[1])
+ && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>,
+ GET_MODE (operands[0]),
+ optimize_function_for_speed_p (cfun))"
+ [(const_int 0)]
+{
+ cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]),
+ optimize_function_for_speed_p (cfun),
+ true, operands[0], operands[0]);
+ DONE;
+})
+
;; Fix a decomposed szext: fuse it with the memory operand of the
;; load. This is typically the sign-extension part of a decomposed
;; "indirect offset" address.