aboutsummaryrefslogtreecommitdiff
path: root/gcc/avoid-store-forwarding.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/avoid-store-forwarding.cc')
-rw-r--r--gcc/avoid-store-forwarding.cc170
1 files changed, 135 insertions, 35 deletions
diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 34a7bba..78ed736 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -80,12 +80,12 @@ public:
{}
/* opt_pass methods: */
- virtual bool gate (function *)
+ virtual bool gate (function *) final override
{
return flag_avoid_store_forwarding && optimize >= 1;
}
- virtual unsigned int execute (function *) override;
+ virtual unsigned int execute (function *) final override;
}; // class pass_rtl_avoid_store_forwarding
/* Handler for finding and avoiding store forwardings. */
@@ -119,17 +119,6 @@ generate_bit_insert_sequence (store_fwd_info *store_info, rtx dest)
unsigned HOST_WIDE_INT bitsize = store_size * BITS_PER_UNIT;
unsigned HOST_WIDE_INT start = store_info->offset * BITS_PER_UNIT;
- /* Adjust START for machines with BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN.
- Given that the bytes will be reversed in this case, we need to
- calculate the starting position from the end of the destination
- register. */
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
- {
- unsigned HOST_WIDE_INT load_mode_bitsize
- = (GET_MODE_BITSIZE (GET_MODE (dest))).to_constant ();
- start = load_mode_bitsize - bitsize - start;
- }
-
rtx mov_reg = store_info->mov_reg;
store_bit_field (dest, bitsize, start, 0, 0, GET_MODE (mov_reg), mov_reg,
false, false);
@@ -156,11 +145,18 @@ is_store_forwarding (rtx store_mem, rtx load_mem, HOST_WIDE_INT *off_val)
poly_int64 load_offset, store_offset;
rtx load_base = strip_offset (XEXP (load_mem, 0), &load_offset);
rtx store_base = strip_offset (XEXP (store_mem, 0), &store_offset);
+ poly_int64 off_diff = store_offset - load_offset;
+
+ HOST_WIDE_INT off_val_tmp = 0;
+ bool is_off_diff_constant = off_diff.is_constant (&off_val_tmp);
+ if (off_val)
+ *off_val = off_val_tmp;
+
return (MEM_SIZE (load_mem).is_constant ()
&& rtx_equal_p (load_base, store_base)
&& known_subrange_p (store_offset, MEM_SIZE (store_mem),
load_offset, MEM_SIZE (load_mem))
- && (store_offset - load_offset).is_constant (off_val));
+ && is_off_diff_constant);
}
/* Given a list of small stores that are forwarded to LOAD_INSN, try to
@@ -176,20 +172,28 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
/* Memory sizes should be constants at this stage. */
HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
- /* If the stores cover all the bytes of the load without overlap then we can
- eliminate the load entirely and use the computed value instead. */
+ /* If the stores cover all the bytes of the load, then we can eliminate
+ the load entirely and use the computed value instead.
+ We can also eliminate stores on addresses that are overwritten
+ by later stores. */
sbitmap forwarded_bytes = sbitmap_alloc (load_size);
bitmap_clear (forwarded_bytes);
unsigned int i;
store_fwd_info* it;
+ auto_vec<store_fwd_info> redundant_stores;
+ auto_vec<int> store_ind_to_remove;
FOR_EACH_VEC_ELT (stores, i, it)
{
HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
- if (bitmap_bit_in_range_p (forwarded_bytes, it->offset,
- it->offset + store_size - 1))
- break;
+ if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
+ it->offset + store_size - 1))
+ {
+ redundant_stores.safe_push (*it);
+ store_ind_to_remove.safe_push (i);
+ continue;
+ }
bitmap_set_range (forwarded_bytes, it->offset, store_size);
}
@@ -215,6 +219,15 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
fprintf (dump_file, "(Load elimination candidate)\n");
}
+ /* Remove redundant stores from the vector. Although this is quadratic,
+ there doesn't seem to be much point optimizing it. The number of
+ redundant stores is expected to be low and the length of the list is
+ limited by a --param. The dependence checking that we did earlier is
+ also quadratic in the size of this list. */
+ store_ind_to_remove.reverse ();
+ for (int i : store_ind_to_remove)
+ stores.ordered_remove (i);
+
rtx load = single_set (load_insn);
rtx dest;
@@ -225,6 +238,28 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
int move_to_front = -1;
int total_cost = 0;
+ int base_offset_index = -1;
+
+ /* Find the last store that has the same offset the load, in the case that
+ we're eliminating the load. We will try to use it as a base register
+ to avoid bit inserts (see second loop below). We want the last one, as
+ it will be wider and we don't want to overwrite the base register if
+ there are many of them. */
+ if (load_elim)
+ {
+ FOR_EACH_VEC_ELT_REVERSE (stores, i, it)
+ {
+ const bool has_base_offset
+ = known_eq (poly_uint64 (it->offset),
+ subreg_size_lowpart_offset (MEM_SIZE (it->store_mem),
+ load_size));
+ if (has_base_offset)
+ {
+ base_offset_index = i;
+ break;
+ }
+ }
+ }
/* Check if we can emit bit insert instructions for all forwarded stores. */
FOR_EACH_VEC_ELT (stores, i, it)
@@ -232,16 +267,19 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
rtx_insn *insns = NULL;
- /* If we're eliminating the load then find the store with zero offset
- and use it as the base register to avoid a bit insert if possible. */
- if (load_elim && it->offset == 0)
+ /* Check if this is a store with base offset, if we're eliminating the
+ load, and use it as the base register to avoid a bit insert if
+ possible. Load elimination is implied by base_offset_index != -1. */
+ if (i == (unsigned) base_offset_index)
{
start_sequence ();
- rtx ext0 = gen_rtx_ZERO_EXTEND (GET_MODE (dest), it->mov_reg);
- if (ext0)
+ rtx base_reg = lowpart_subreg (GET_MODE (dest), it->mov_reg,
+ GET_MODE (it->mov_reg));
+
+ if (base_reg)
{
- rtx_insn *move0 = emit_move_insn (dest, ext0);
+ rtx_insn *move0 = emit_move_insn (dest, base_reg);
if (recog_memoized (move0) >= 0)
{
insns = get_insns ();
@@ -346,8 +384,7 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
start_sequence ();
rtx_insn *insn = emit_insn (load_move);
- rtx_insn *seq = get_insns ();
- end_sequence ();
+ rtx_insn *seq = end_sequence ();
if (recog_memoized (insn) < 0)
return false;
@@ -376,6 +413,16 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
print_rtl_single (dump_file, insn);
}
}
+
+ if (redundant_stores.length () > 0)
+ {
+ fprintf (dump_file, "\nRedundant stores that have been removed:\n");
+ FOR_EACH_VEC_ELT (redundant_stores, i, it)
+ {
+ fprintf (dump_file, " ");
+ print_rtl_single (dump_file, it->store_insn);
+ }
+ }
}
stats_sf_avoided++;
@@ -395,6 +442,10 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn,
delete_insn (it->store_insn);
}
+ /* Delete redundant stores. */
+ FOR_EACH_VEC_ELT (redundant_stores, i, it)
+ delete_insn (it->store_insn);
+
df_insn_rescan (load_insn);
if (load_elim)
@@ -412,9 +463,22 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
return;
auto_vec<store_fwd_info, 8> store_exprs;
+ auto_vec<rtx> store_exprs_del;
rtx_insn *insn;
unsigned int insn_cnt = 0;
+ /* We are iterating over the basic block's instructions detecting store
+ instructions. Upon reaching a load instruction, we check if any of the
+ previously detected stores could result in store forwarding. In that
+ case, we try to reorder the load and store instructions.
+ We skip this transformation when we encounter complex memory operations,
+ instructions that might throw an exception, instruction dependencies,
+ etc. This is done by clearing the vector of detected stores, while
+ keeping the removed stores in another vector. By doing so, we can check
+ if any of the removed stores operated on the load's address range, when
+ reaching a subsequent store that operates on the same address range,
+ as this would lead to incorrect values on the register that keeps the
+ loaded value. */
FOR_BB_INSNS (bb, insn)
{
if (!NONDEBUG_INSN_P (insn))
@@ -427,6 +491,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
if (!set || insn_could_throw_p (insn))
{
+ unsigned int i;
+ store_fwd_info *it;
+ FOR_EACH_VEC_ELT (store_exprs, i, it)
+ store_exprs_del.safe_push (it->store_mem);
store_exprs.truncate (0);
continue;
}
@@ -450,6 +518,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
|| (load_mem && (!MEM_SIZE_KNOWN_P (load_mem)
|| !MEM_SIZE (load_mem).is_constant ())))
{
+ unsigned int i;
+ store_fwd_info *it;
+ FOR_EACH_VEC_ELT (store_exprs, i, it)
+ store_exprs_del.safe_push (it->store_mem);
store_exprs.truncate (0);
continue;
}
@@ -501,6 +573,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ store_exprs_del.safe_push (it->store_mem);
}
}
}
@@ -540,23 +613,46 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ forwardings.truncate (0);
}
else if (is_store_forwarding (store_mem, load_mem, &off_val))
{
+ unsigned int j;
+ rtx *del_it;
+ bool same_range_as_removed = false;
+
+ /* Check if another store in the load's address range has
+ been deleted due to a constraint violation. In this case
+ we can't forward any other stores that operate in this
+ range, as it would lead to partial update of the register
+ that holds the loaded value. */
+ FOR_EACH_VEC_ELT (store_exprs_del, j, del_it)
+ {
+ rtx del_store_mem = *del_it;
+ same_range_as_removed
+ = is_store_forwarding (del_store_mem, load_mem, NULL);
+ if (same_range_as_removed)
+ break;
+ }
+
/* Check if moving this store after the load is legal. */
bool write_dep = false;
- for (unsigned int j = store_exprs.length () - 1; j != i; j--)
+ if (!same_range_as_removed)
{
- if (!store_exprs[j].forwarded
- && output_dependence (store_mem,
- store_exprs[j].store_mem))
+ unsigned int j = store_exprs.length () - 1;
+ for (; j != i; j--)
{
- write_dep = true;
- break;
+ if (!store_exprs[j].forwarded
+ && output_dependence (store_mem,
+ store_exprs[j].store_mem))
+ {
+ write_dep = true;
+ break;
+ }
}
}
- if (!write_dep)
+ if (!same_range_as_removed && !write_dep)
{
it->forwarded = true;
it->offset = off_val;
@@ -576,6 +672,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
it->remove = true;
removed_count++;
remove_rest = true;
+ forwardings.truncate (0);
}
}
@@ -583,9 +680,12 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb)
process_store_forwarding (forwardings, insn, load_mem);
}
+ /* Abort in case that we encounter a memory read/write that is not a
+ simple store/load, as we can't make safe assumptions about the
+ side-effects of this. */
if ((writes_mem && !is_simple_store)
|| (reads_mem && !is_simple_load))
- store_exprs.truncate (0);
+ return;
if (removed_count)
{