diff options
author | Alexandre Oliva <oliva@adacore.com> | 2025-01-10 09:32:05 -0300 |
---|---|---|
committer | Alexandre Oliva <oliva@gnu.org> | 2025-01-10 09:32:05 -0300 |
commit | 38401c58f4aae31fd29a16607e9018cb1f66c3ed (patch) | |
tree | f91dfb138e722492b9ccaa17311a6079fe6bc20a /gcc | |
parent | d019ab4f115caab48316c185c007765719e93052 (diff) | |
download | gcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.zip gcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.tar.gz gcc-38401c58f4aae31fd29a16607e9018cb1f66c3ed.tar.bz2 |
ifcombine field-merge: improve handling of dwords
On 32-bit hosts, data types with 64-bit alignment aren't getting
treated as desired by ifcombine field-merging: we limit the choice of
modes at BITS_PER_WORD sizes, but when deciding the boundary for a
split, we'd limit the choice only by the alignment, so we wouldn't
even consider a split at an odd 32-bit boundary. Fix that by limiting
the boundary choice by word choice as well.
Now, this would still leave misaligned 64-bit fields in 64-bit-aligned
data structures unhandled by ifcombine on 32-bit hosts. We already
need to loading them as double words, and if they're not byte-aligned,
the code gets really ugly, but ifcombine could improve it if it allows
double-word loads as a last resort. I've added that.
for gcc/ChangeLog
* gimple-fold.cc (fold_truth_andor_for_ifcombine): Limit
boundary choice by word size as well. Try aligned double-word
loads as a last resort.
for gcc/testsuite/ChangeLog
* gcc.dg/field-merge-17.c: New.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/gimple-fold.cc | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/field-merge-17.c | 46 |
2 files changed, 73 insertions, 3 deletions
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 0402c76..c8a726e 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8381,16 +8381,40 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, { /* Consider the possibility of recombining loads if any of the fields straddles across an alignment boundary, so that either - part can be loaded along with the other field. */ + part can be loaded along with the other field. Since we + limit access modes to BITS_PER_WORD, don't exceed that, + otherwise on a 32-bit host and a 64-bit-aligned data + structure, we'll fail the above for a field that straddles + across two words, and would fail here for not even trying to + split it at between 32-bit words. */ HOST_WIDE_INT boundary = compute_split_boundary_from_align - (ll_align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); + (MIN (ll_align, BITS_PER_WORD), + ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); if (boundary < 0 || !get_best_mode (boundary - first_bit, first_bit, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode) || !get_best_mode (end_bit - boundary, boundary, 0, ll_end_region, ll_align, BITS_PER_WORD, volatilep, &lnmode2)) - return 0; + { + if (ll_align <= BITS_PER_WORD) + return 0; + + /* As a last resort, try double-word access modes. This + enables us to deal with misaligned double-word fields + that straddle across 3 separate words. */ + boundary = compute_split_boundary_from_align + (MIN (ll_align, 2 * BITS_PER_WORD), + ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize); + if (boundary < 0 + || !get_best_mode (boundary - first_bit, first_bit, + 0, ll_end_region, ll_align, 2 * BITS_PER_WORD, + volatilep, &lnmode) + || !get_best_mode (end_bit - boundary, boundary, + 0, ll_end_region, ll_align, 2 * BITS_PER_WORD, + volatilep, &lnmode2)) + return 0; + } /* If we can't have a single load, but can with two, figure out whether the two compares can be separated, i.e., whether the entirety of the diff --git a/gcc/testsuite/gcc.dg/field-merge-17.c b/gcc/testsuite/gcc.dg/field-merge-17.c new file mode 100644 index 0000000..06c8ec1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-17.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-tree-ifcombine-details" } */ + +/* Check that we can optimize misaligned double-words. */ + +struct s { + short a; + long long b; + int c; + long long d; + short e; +} __attribute__ ((packed, aligned (8))); + +struct s p = { 0, 0, 0, 0, 0 }; + +__attribute__ ((__noinline__, __noipa__, __noclone__)) +int fp () +{ + if (p.a + || p.b + || p.c + || p.d + || p.e) + return 1; + else + return -1; +} + +int main () { + /* Unlikely, but play safe. */ + if (sizeof (long long) == sizeof (short)) + return 0; + if (fp () > 0) + __builtin_abort (); + unsigned char *pc = (unsigned char *)&p; + for (int i = 0; i < sizeof (p); i++) + { + pc[i] = 1; + if (fp () < 0) + __builtin_abort (); + pc[i] = 0; + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "optimizing" 4 "ifcombine" } } */ |