aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-sccvn.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-02-13 10:04:11 +0100
committerJakub Jelinek <jakub@redhat.com>2020-02-13 10:04:11 +0100
commit8aba425f4ebc5e2c054776d3cdddf13f7c1918f8 (patch)
treeb99f04913cac6d23c8d1bccea47a2a03a9d44c98 /gcc/tree-ssa-sccvn.c
parent8ea884b85e338d09b14e6a54043c53ae0c1b1fe9 (diff)
downloadgcc-8aba425f4ebc5e2c054776d3cdddf13f7c1918f8.zip
gcc-8aba425f4ebc5e2c054776d3cdddf13f7c1918f8.tar.gz
gcc-8aba425f4ebc5e2c054776d3cdddf13f7c1918f8.tar.bz2
sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]
The following patch is first step towards fixing PR93582. vn_reference_lookup_3 right now punts on anything that isn't byte aligned, so to be able to lookup a constant bitfield store, one needs to use the exact same COMPONENT_REF, otherwise it isn't found. This patch lifts up that that restriction if the bits to be loaded are covered by a single store of a constant (keeps the restriction so far for the multiple store case, can tweak that incrementally, but I think for bisection etc. it is worth to do it one step at a time). 2020-02-13 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/93582 * fold-const.h (shift_bytes_in_array_left, shift_bytes_in_array_right): Declare. * fold-const.c (shift_bytes_in_array_left, shift_bytes_in_array_right): New function, moved from gimple-ssa-store-merging.c, no longer static. * gimple-ssa-store-merging.c (shift_bytes_in_array): Move to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left. (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c. (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of shift_bytes_in_array. (verify_shift_bytes_in_array): Rename to ... (verify_shift_bytes_in_array_left): ... this. Use shift_bytes_in_array_left instead of shift_bytes_in_array. (store_merging_c_tests): Call verify_shift_bytes_in_array_left instead of verify_shift_bytes_in_array. * tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr / native_interpret_expr where the store covers all needed bits, punt on PDP-endian, otherwise allow all involved offsets and sizes not to be byte-aligned. * gcc.dg/tree-ssa/pr93582-1.c: New test. * gcc.dg/tree-ssa/pr93582-2.c: New test. * gcc.dg/tree-ssa/pr93582-3.c: New test.
Diffstat (limited to 'gcc/tree-ssa-sccvn.c')
-rw-r--r--gcc/tree-ssa-sccvn.c90
1 files changed, 68 insertions, 22 deletions
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 33cd12b..30518ab 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
- && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
/* native_encode and native_decode operate on arrays of bytes
and so fundamentally need a compile-time size and offset. */
&& maxsize.is_constant (&maxsizei)
- && maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0
&& (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
|| (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
&& is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
@@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& !reverse
&& !storage_order_barrier_p (lhs)
&& known_eq (maxsize2, size2)
- && multiple_p (size2, BITS_PER_UNIT)
- && multiple_p (offset2, BITS_PER_UNIT)
&& adjust_offsets_for_equal_base_address (base, &offset,
base2, &offset2)
&& offset.is_constant (&offseti)
@@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& known_subrange_p (offseti, maxsizei, offset2, size2))
{
/* We support up to 512-bit values (for V8DFmode). */
- unsigned char buffer[64];
+ unsigned char buffer[65];
int len;
tree rhs = gimple_assign_rhs1 (def_stmt);
if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
- unsigned pad = 0;
- if (BYTES_BIG_ENDIAN
- && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
- {
- /* On big-endian the padding is at the 'front' so
- just skip the initial bytes. */
- fixed_size_mode mode
- = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
- pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
- }
len = native_encode_expr (rhs,
- buffer, sizeof (buffer),
- ((offseti - offset2i) / BITS_PER_UNIT
- + pad));
+ buffer, sizeof (buffer) - 1,
+ (offseti - offset2i) / BITS_PER_UNIT);
if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{
tree type = vr->type;
+ unsigned char *buf = buffer;
+ unsigned int amnt = 0;
/* Make sure to interpret in a type that has a range
covering the whole access size. */
if (INTEGRAL_TYPE_P (vr->type)
&& maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei,
TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type, buffer,
- maxsizei / BITS_PER_UNIT);
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* For big-endian native_encode_expr stored the rhs
+ such that the LSB of it is the LSB of buffer[len - 1].
+ That bit is stored into memory at position
+ offset2 + size2 - 1, i.e. in byte
+ base + (offset2 + size2 - 1) / BITS_PER_UNIT.
+ E.g. for offset2 1 and size2 14, rhs -1 and memory
+ previously cleared that is:
+ 0 1
+ 01111111|11111110
+ Now, if we want to extract offset 2 and size 12 from
+ it using native_interpret_expr (which actually works
+ for integral bitfield types in terms of byte size of
+ the mode), the native_encode_expr stored the value
+ into buffer as
+ XX111111|11111111
+ and returned len 2 (the X bits are outside of
+ precision).
+ Let sz be maxsize / BITS_PER_UNIT if not extracting
+ a bitfield, and GET_MODE_SIZE otherwise.
+ We need to align the LSB of the value we want to
+ extract as the LSB of buf[sz - 1].
+ The LSB from memory we need to read is at position
+ offset + maxsize - 1. */
+ HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
+ if (INTEGRAL_TYPE_P (type))
+ sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei) % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_right (buffer, len, amnt);
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei - amnt) / BITS_PER_UNIT;
+ if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
+ len = 0;
+ else
+ {
+ buf = buffer + len - sz - amnt;
+ len -= (buf - buffer);
+ }
+ }
+ else
+ {
+ amnt = ((unsigned HOST_WIDE_INT) offset2i
+ - offseti) % BITS_PER_UNIT;
+ if (amnt)
+ {
+ buffer[len] = 0;
+ shift_bytes_in_array_left (buffer, len + 1, amnt);
+ buf = buffer + 1;
+ }
+ }
+ tree val = native_interpret_expr (type, buf, len);
/* If we chop off bits because the types precision doesn't
match the memory access size this is ok when optimizing
reads but not when called from the DSE code during
@@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
return data->finish (get_alias_set (lhs), val);
}
}
- else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+ else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
+ size2i)
+ && maxsizei % BITS_PER_UNIT == 0
+ && offseti % BITS_PER_UNIT == 0
+ && size2i % BITS_PER_UNIT == 0
+ && offset2i % BITS_PER_UNIT == 0)
{
pd_data pd;
tree rhs = gimple_assign_rhs1 (def_stmt);