aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Law <jlaw@ventanamicro.com>2023-12-10 10:29:23 -0700
committerJeff Law <jlaw@ventanamicro.com>2023-12-10 10:29:23 -0700
commit73f6e1fe8835085ccc6de5c5f4428d47e853913b (patch)
treeb16753597fe272f8933cfcdc7e06a83a3ff0cd8a
parent1f55c5cc698519094f751257db62ff274c015fdc (diff)
downloadgcc-73f6e1fe8835085ccc6de5c5f4428d47e853913b.zip
gcc-73f6e1fe8835085ccc6de5c5f4428d47e853913b.tar.gz
gcc-73f6e1fe8835085ccc6de5c5f4428d47e853913b.tar.bz2
[committed] Provide patterns for signed bitfield extractions on H8
Inspired by Roger's work on the ARC port, this patch provides a define_and_split pattern to optimize sign extended bitfields starting at position 0 using an approach that doesn't require shifting. It then builds on that to provide another define_and_split pattern to support arbitrary signed bitfield extractions -- it uses a right logical shift to move the bitfield into position 0, then the specialized pattern above to sign extend the MSB of the field through the rest of the register. This is often, but certainly not always, better than a two shift approach. The code uses the sizes of the sequences to select between the two shift approach and single shift with extension from an arbitrary location approach. There's certainly further improvements that could be made here, but I think we're getting the bulk of the improvements already. Regression tested on the H8 port without errors. Installing on the trunk. gcc/ * config/h8300/h8300-protos.h (use_extvsi): Prototype. * config/h8300/combiner.md: Two new define_insn_and_split patterns to implement signed bitfield extractions. * config/h8300/h8300.cc (use_extvsi): New function.
-rw-r--r--gcc/config/h8300/combiner.md49
-rw-r--r--gcc/config/h8300/h8300-protos.h1
-rw-r--r--gcc/config/h8300/h8300.cc64
3 files changed, 113 insertions, 1 deletions
diff --git a/gcc/config/h8300/combiner.md b/gcc/config/h8300/combiner.md
index cce1878..d5f26b5 100644
--- a/gcc/config/h8300/combiner.md
+++ b/gcc/config/h8300/combiner.md
@@ -1269,7 +1269,54 @@
;; (pc)))]
;; "")
-;; Various ways to extract a single bit bitfield and sign extend it
+;; This is a signed bitfield extraction starting at bit 0
+;; It's usually faster than using shifts, but not always,
+;; particularly on the H8/S and H8/SX variants.
+(define_insn_and_split "*extvsi_n_0"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand")
+ (const_int 0)))]
+ "INTVAL (operands[2]) > 1
+ && INTVAL (operands[2]) < (TARGET_H8300S ? 26 - TARGET_H8300SX : 29)
+ && (!TARGET_H8300SX || (INTVAL (operands[2]) != 24 && INTVAL (operands[2]) != 17))"
+ "#"
+ "&& reload_completed"
+[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])]
+{
+ int tmp = INTVAL (operands[2]);
+ operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp));
+ operands[4] = GEN_INT (HOST_WIDE_INT_1U << (tmp - 1));
+})
+
+(define_insn_and_split "*extvsi_n_n"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand")
+ (match_operand 3 "const_int_operand")))]
+ "(!h8300_shift_needs_scratch_p (INTVAL (operands[3]), SImode, LSHIFTRT)
+ && use_extvsi (INTVAL (operands[2]), INTVAL (operands[3])))"
+ "#"
+ "&& reload_completed"
+[(parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 4)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 5)))
+ (clobber (reg:CC CC_REG))])
+ (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 5)))
+ (clobber (reg:CC CC_REG))])]
+{
+ int tmp = INTVAL (operands[2]);
+ operands[4] = gen_int_mode (~(HOST_WIDE_INT_M1U << tmp), SImode);
+ operands[5] = gen_int_mode (HOST_WIDE_INT_1U << (tmp - 1), SImode);
+})
+
;;
;; Testing showed this only triggering with SImode, probably because
;; of how insv/extv are defined.
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index 3376bd0..96bd0c8 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -111,5 +111,6 @@ extern const char * output_h8sx_shift (rtx *, int, int);
extern bool h8300_operands_match_p (rtx *);
extern bool h8sx_mergeable_memrefs_p (rtx, rtx);
extern poly_int64 h8300_push_rounding (poly_int64);
+extern bool use_extvsi (int, int);
#endif /* ! GCC_H8300_PROTOS_H */
diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc
index 5f9bbc9..f906286 100644
--- a/gcc/config/h8300/h8300.cc
+++ b/gcc/config/h8300/h8300.cc
@@ -5503,6 +5503,70 @@ h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
}
}
+/* Return true if a signed bitfield extraction with length COUNT
+ starting at position POS should be optimized by first shifting
+ right to put the field in the LSB, then using a 3 operand sequence
+ to sign extend from an arbitrary position. Return false
+ otherwise.
+
+ The basic idea here is to compute the length of each sequence
+ and use that as a proxy for performance. It's not strictly
+ correct on the H8/SX which has variable timed shifts and some
+ lengths may be incorrect, but this is pretty close.
+
+ There may be cases where the length computations are inaccurate
+ which may in turn lead to a sub-optimal sequence, but that
+ should be rare.
+
+ We do not try to balance avoiding a loop with burning an extra
+ couple bytes. Those probably couple be handled with special
+ cases. */
+
+bool
+use_extvsi (int count, int pos)
+{
+ rtx operands[3];
+ operands[0] = gen_rtx_REG (SImode, 0);
+ operands[1] = gen_rtx_REG (SImode, 0);
+
+ /* We have a special sequence to sign extract a single bit
+ object, otherwise compute it as a pair of shifts, first
+ left, then arithmetic right. The cost of that special
+ sequence is 8/10 depending on where the bit is. */
+ unsigned shift_cost;
+ if (count == 1)
+ shift_cost = pos >= 16 ? 10 : 8;
+ else
+ {
+ unsigned lshift = 32 - (count + pos);
+ unsigned rshift = 32 - count;
+ operands[2] = GEN_INT (lshift);
+ shift_cost = compute_a_shift_length (operands, ASHIFT);
+ operands[2] = GEN_INT (rshift);
+ shift_cost += compute_a_shift_length (operands, ASHIFTRT);
+ }
+
+ /* Cost of hopefully optimized sequence. First we logically shift right
+ by an adjusted count. Logicals are generally better than arith,
+ particularly for H8/SX. */
+ operands[2] = GEN_INT (pos);
+ unsigned opt_cost = compute_a_shift_length (operands, LSHIFTRT);
+ operands[2] = gen_int_mode (~(HOST_WIDE_INT_M1U << count), SImode);
+ opt_cost += compute_logical_op_length (SImode, AND, operands, NULL);
+ operands[2] = gen_int_mode (HOST_WIDE_INT_1U << (count - 1), SImode);
+ opt_cost += compute_logical_op_length (SImode, XOR, operands, NULL);
+
+ /* H8/SX has short form subtraction. */
+ if (TARGET_H8300SX && (INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 7))
+ opt_cost += 2;
+ else if (TARGET_H8300SX && (INTVAL (operands[2]) >= 8 && INTVAL (operands[2]) <= 32767))
+ opt_cost += 4;
+ else
+ opt_cost += 6;
+
+ return opt_cost <= shift_cost;
+}
+
/* Implement PUSH_ROUNDING.
On the H8/300, @-sp really pushes a byte if you ask it to - but that's