aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-07-31 16:35:33 +0200
committerStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-07-31 16:35:33 +0200
commit7cdd0860949c6c3232e6cff1d7ca37bb5234074c (patch)
tree6cb1a23e54b8af2c47c2cba403e8ad73850e9230
parent5a770748e8eea38a4ed1f3c4dd8d807aa1640536 (diff)
downloadgcc-7cdd0860949c6c3232e6cff1d7ca37bb5234074c.zip
gcc-7cdd0860949c6c3232e6cff1d7ca37bb5234074c.tar.gz
gcc-7cdd0860949c6c3232e6cff1d7ca37bb5234074c.tar.bz2
combine: Narrow comparison of memory and constant
Comparisons between memory and constants might be done in a smaller mode resulting in smaller constants which might finally end up as immediates instead of in the literal pool. For example, on s390x a non-symmetric comparison like x <= 0x3fffffffffffffff results in the constant being spilled to the literal pool and an 8 byte memory comparison is emitted. Ideally, an equivalent comparison x0 <= 0x3f where x0 is the most significant byte of x, is emitted where the constant is smaller and more likely to materialize as an immediate. Similarly, comparisons of the form x >= 0x4000000000000000 can be shortened into x0 >= 0x40. gcc/ChangeLog: * combine.cc (simplify_compare_const): Narrow comparison of memory and constant. (try_combine): Adapt new function signature. (simplify_comparison): Adapt new function signature. gcc/testsuite/ChangeLog: * gcc.dg/cmp-mem-const-1.c: New test. * gcc.dg/cmp-mem-const-2.c: New test. * gcc.dg/cmp-mem-const-3.c: New test. * gcc.dg/cmp-mem-const-4.c: New test. * gcc.dg/cmp-mem-const-5.c: New test. * gcc.dg/cmp-mem-const-6.c: New test. * gcc.target/s390/cmp-mem-const-1.c: New test.
-rw-r--r--gcc/combine.cc79
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-1.c17
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-2.c17
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-3.c17
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-4.c17
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-5.c17
-rw-r--r--gcc/testsuite/gcc.dg/cmp-mem-const-6.c17
-rw-r--r--gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c24
8 files changed, 200 insertions, 5 deletions
diff --git a/gcc/combine.cc b/gcc/combine.cc
index 4bf867d..0d99fa5 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -457,7 +457,7 @@ static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx,
static int recog_for_combine (rtx *, rtx_insn *, rtx *);
static rtx gen_lowpart_for_combine (machine_mode, rtx);
static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode,
- rtx, rtx *);
+ rtx *, rtx *);
static enum rtx_code simplify_comparison (enum rtx_code, rtx *, rtx *);
static void update_table_tick (rtx);
static void record_value_for_reg (rtx, rtx_insn *, rtx);
@@ -3187,7 +3187,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
compare_code = orig_compare_code = GET_CODE (*cc_use_loc);
if (is_a <scalar_int_mode> (GET_MODE (i2dest), &mode))
compare_code = simplify_compare_const (compare_code, mode,
- op0, &op1);
+ &op0, &op1);
target_canonicalize_comparison (&compare_code, &op0, &op1, 1);
}
@@ -11800,13 +11800,14 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
(CODE OP0 const0_rtx) form.
The result is a possibly different comparison code to use.
- *POP1 may be updated. */
+ *POP0 and *POP1 may be updated. */
static enum rtx_code
simplify_compare_const (enum rtx_code code, machine_mode mode,
- rtx op0, rtx *pop1)
+ rtx *pop0, rtx *pop1)
{
scalar_int_mode int_mode;
+ rtx op0 = *pop0;
HOST_WIDE_INT const_op = INTVAL (*pop1);
/* Get the constant we are comparing against and turn off all bits
@@ -11991,6 +11992,74 @@ simplify_compare_const (enum rtx_code code, machine_mode mode,
break;
}
+ /* Narrow non-symmetric comparison of memory and constant as e.g.
+ x0...x7 <= 0x3fffffffffffffff into x0 <= 0x3f where x0 is the most
+ significant byte. Likewise, transform x0...x7 >= 0x4000000000000000 into
+ x0 >= 0x40. */
+ if ((code == LEU || code == LTU || code == GEU || code == GTU)
+ && is_a <scalar_int_mode> (GET_MODE (op0), &int_mode)
+ && MEM_P (op0)
+ && !MEM_VOLATILE_P (op0)
+ /* The optimization makes only sense for constants which are big enough
+ so that we have a chance to chop off something at all. */
+ && (unsigned HOST_WIDE_INT) const_op > 0xff
+ /* Ensure that we do not overflow during normalization. */
+ && (code != GTU || (unsigned HOST_WIDE_INT) const_op < HOST_WIDE_INT_M1U))
+ {
+ unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
+ enum rtx_code adjusted_code;
+
+ /* Normalize code to either LEU or GEU. */
+ if (code == LTU)
+ {
+ --n;
+ adjusted_code = LEU;
+ }
+ else if (code == GTU)
+ {
+ ++n;
+ adjusted_code = GEU;
+ }
+ else
+ adjusted_code = code;
+
+ scalar_int_mode narrow_mode_iter;
+ FOR_EACH_MODE_UNTIL (narrow_mode_iter, int_mode)
+ {
+ unsigned nbits = GET_MODE_PRECISION (int_mode)
+ - GET_MODE_PRECISION (narrow_mode_iter);
+ unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << nbits) - 1;
+ unsigned HOST_WIDE_INT lower_bits = n & mask;
+ if ((adjusted_code == LEU && lower_bits == mask)
+ || (adjusted_code == GEU && lower_bits == 0))
+ {
+ n >>= nbits;
+ break;
+ }
+ }
+
+ if (narrow_mode_iter < int_mode)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (
+ dump_file, "narrow comparison from mode %s to %s: (MEM %s "
+ HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
+ HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
+ GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
+ (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME (adjusted_code),
+ n);
+ }
+ poly_int64 offset = (BYTES_BIG_ENDIAN
+ ? 0
+ : (GET_MODE_SIZE (int_mode)
+ - GET_MODE_SIZE (narrow_mode_iter)));
+ *pop0 = adjust_address_nv (op0, narrow_mode_iter, offset);
+ *pop1 = GEN_INT (n);
+ return adjusted_code;
+ }
+ }
+
*pop1 = GEN_INT (const_op);
return code;
}
@@ -12183,7 +12252,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
/* Try to simplify the compare to constant, possibly changing the
comparison op, and/or changing op1 to zero. */
- code = simplify_compare_const (code, raw_mode, op0, &op1);
+ code = simplify_compare_const (code, raw_mode, &op0, &op1);
const_op = INTVAL (op1);
/* Compute some predicates to simplify code below. */
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-1.c b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
new file mode 100644
index 0000000..263ad98
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_1byte_a (uint64_t *x)
+{
+ return *x <= 0x3fffffffffffffff;
+}
+
+int
+le_1byte_b (uint64_t *x)
+{
+ return *x < 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-2.c b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
new file mode 100644
index 0000000..a7cc534
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_1byte_a (uint64_t *x)
+{
+ return *x > 0x3fffffffffffffff;
+}
+
+int
+ge_1byte_b (uint64_t *x)
+{
+ return *x >= 0x4000000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-3.c b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
new file mode 100644
index 0000000..06f80bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-3.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_2bytes_a (uint64_t *x)
+{
+ return *x <= 0x3ffdffffffffffff;
+}
+
+int
+le_2bytes_b (uint64_t *x)
+{
+ return *x < 0x3ffe000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-4.c b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
new file mode 100644
index 0000000..407999a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to HI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_2bytes_a (uint64_t *x)
+{
+ return *x > 0x400cffffffffffff;
+}
+
+int
+ge_2bytes_b (uint64_t *x)
+{
+ return *x >= 0x400d000000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-5.c b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
new file mode 100644
index 0000000..e16773f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+le_4bytes_a (uint64_t *x)
+{
+ return *x <= 0x3ffffdffffffffff;
+}
+
+int
+le_4bytes_b (uint64_t *x)
+{
+ return *x < 0x3ffffe0000000000;
+}
diff --git a/gcc/testsuite/gcc.dg/cmp-mem-const-6.c b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
new file mode 100644
index 0000000..8f53b56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cmp-mem-const-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -fdump-rtl-combine-details" } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to SI" "combine" } } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+int
+ge_4bytes_a (uint64_t *x)
+{
+ return *x > 0x4000cfffffffffff;
+}
+
+int
+ge_4bytes_b (uint64_t *x)
+{
+ return *x >= 0x4000d00000000000;
+}
diff --git a/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
new file mode 100644
index 0000000..309aafb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/cmp-mem-const-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O1 -march=z13 -mzarch -fdump-rtl-combine-details" } */
+/* { dg-final { scan-assembler-not {\tclc\t} } } */
+/* { dg-final { scan-rtl-dump "narrow comparison from mode DI to QI" "combine" } } */
+
+struct s
+{
+ long a;
+ unsigned b : 1;
+ unsigned c : 1;
+};
+
+int foo (struct s *x)
+{
+ /* Expression
+ x->b || x->c
+ is transformed into
+ _1 = BIT_FIELD_REF <*x_4(D), 64, 64>;
+ _2 = _1 > 0x3FFFFFFFFFFFFFFF;
+ where the constant may materialize in the literal pool and an 8 byte CLC
+ may be emitted. Ensure this is not the case.
+ */
+ return x->b || x->c;
+}