aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Oliva <oliva@adacore.com>2025-01-10 09:32:05 -0300
committerAlexandre Oliva <oliva@gnu.org>2025-01-10 09:32:05 -0300
commit38401c58f4aae31fd29a16607e9018cb1f66c3ed (patch)
treef91dfb138e722492b9ccaa17311a6079fe6bc20a
parentd019ab4f115caab48316c185c007765719e93052 (diff)
downloadgcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.zip
gcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.tar.gz
gcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.tar.bz2
ifcombine field-merge: improve handling of dwords
On 32-bit hosts, data types with 64-bit alignment aren't getting treated as desired by ifcombine field-merging: we limit the choice of modes at BITS_PER_WORD sizes, but when deciding the boundary for a split, we'd limit the choice only by the alignment, so we wouldn't even consider a split at an odd 32-bit boundary. Fix that by limiting the boundary choice by word choice as well. Now, this would still leave misaligned 64-bit fields in 64-bit-aligned data structures unhandled by ifcombine on 32-bit hosts. We already need to loading them as double words, and if they're not byte-aligned, the code gets really ugly, but ifcombine could improve it if it allows double-word loads as a last resort. I've added that. for gcc/ChangeLog * gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit boundary choice by word size as well. Try aligned double-word loads as a last resort. for gcc/testsuite/ChangeLog * gcc.dg/field-merge-17.c: New.
-rw-r--r--gcc/gimple-fold.cc30
-rw-r--r--gcc/testsuite/gcc.dg/field-merge-17.c46
2 files changed, 73 insertions, 3 deletions
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 0402c76..c8a726e 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8381,16 +8381,40 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type,
{
/* Consider the possibility of recombining loads if any of the
fields straddles across an alignment boundary, so that either
- part can be loaded along with the other field. */
+ part can be loaded along with the other field. Since we
+ limit access modes to BITS_PER_WORD, don't exceed that,
+ otherwise on a 32-bit host and a 64-bit-aligned data
+ structure, we'll fail the above for a field that straddles
+ across two words, and would fail here for not even trying to
+ split it at between 32-bit words. */
HOST_WIDE_INT boundary = compute_split_boundary_from_align
- (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+ (MIN (ll_align, BITS_PER_WORD),
+ ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
if (boundary < 0
|| !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region,
ll_align, BITS_PER_WORD, volatilep, &lnmode)
|| !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region,
ll_align, BITS_PER_WORD, volatilep, &lnmode2))
- return 0;
+ {
+ if (ll_align <= BITS_PER_WORD)
+ return 0;
+
+ /* As a last resort, try double-word access modes. This
+ enables us to deal with misaligned double-word fields
+ that straddle across 3 separate words. */
+ boundary = compute_split_boundary_from_align
+ (MIN (ll_align, 2 * BITS_PER_WORD),
+ ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+ if (boundary < 0
+ || !get_best_mode (boundary - first_bit, first_bit,
+ 0, ll_end_region, ll_align, 2 * BITS_PER_WORD,
+ volatilep, &lnmode)
+ || !get_best_mode (end_bit - boundary, boundary,
+ 0, ll_end_region, ll_align, 2 * BITS_PER_WORD,
+ volatilep, &lnmode2))
+ return 0;
+ }
/* If we can't have a single load, but can with two, figure out whether
the two compares can be separated, i.e., whether the entirety of the
diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c b/gcc/testsuite/gcc.dg/field-merge-17.c
new file mode 100644
index 0000000..06c8ec1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-17.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ifcombine-details" } */
+
+/* Check that we can optimize misaligned double-words. */
+
+struct s {
+ short a;
+ long long b;
+ int c;
+ long long d;
+ short e;
+} __attribute__ ((packed, aligned (8)));
+
+struct s p = { 0, 0, 0, 0, 0 };
+
+__attribute__ ((__noinline__, __noipa__, __noclone__))
+int fp ()
+{
+ if (p.a
+ || p.b
+ || p.c
+ || p.d
+ || p.e)
+ return 1;
+ else
+ return -1;
+}
+
+int main () {
+ /* Unlikely, but play safe. */
+ if (sizeof (long long) == sizeof (short))
+ return 0;
+ if (fp () > 0)
+ __builtin_abort ();
+ unsigned char *pc = (unsigned char *)&p;
+ for (int i = 0; i < sizeof (p); i++)
+ {
+ pc[i] = 1;
+ if (fp () < 0)
+ __builtin_abort ();
+ pc[i] = 0;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimizing" 4 "ifcombine" } } */