aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/doc/invoke.texi3
-rw-r--r--gcc/tree-ssa-loop-prefetch.c29
3 files changed, 22 insertions, 16 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a0ba83a..8d01aa4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
2010-05-17 Changpeng Fang <changpeng.fang@amd.com>
+ * doc/invoke.texi: Update documentation for min-insn-to-prefetch-ratio.
+ * tree-ssa-loop-prefetch.c (is_loop_prefetching_profitable): Also apply
+ the insn to prefetch ratio heuristic to loops with known trip count.
+
+2010-05-17 Changpeng Fang <changpeng.fang@amd.com>
+
* tree-ssa-loop-prefetch.c (PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO): New.
(schedule_prefetches): Do not generate a prefetch if the unroll factor
is far from what is required by the prefetch.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e994902..bd29d24 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -8518,8 +8518,7 @@ The size of L2 cache, in kilobytes.
@item min-insn-to-prefetch-ratio
The minimum ratio between the number of instructions and the
-number of prefetches to enable prefetching in a loop with an
-unknown trip count.
+number of prefetches to enable prefetching in a loop.
@item prefetch-min-insn-to-mem-ratio
The minimum ratio between the number of instructions and the
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 4889604..4d85f54 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -1603,17 +1603,9 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter,
return false;
}
- /* Profitability of prefetching is highly dependent on the trip count.
- For a given AHEAD distance, the first AHEAD iterations do not benefit
- from prefetching, and the last AHEAD iterations execute useless
- prefetches. So, if the trip count is not large enough relative to AHEAD,
- prefetching may cause serious performance degradation. To avoid this
- problem when the trip count is not known at compile time, we
- conservatively skip loops with high prefetching costs. For now, only
- the I-cache cost is considered. The relative I-cache cost is estimated
- by taking the ratio between the number of prefetches and the total
- number of instructions. Since we are using integer arithmetic, we
- compute the reciprocal of this ratio.
+ /* Prefetching most likely causes performance degradation when the instruction
+ to prefetch ratio is too small. Too many prefetch instructions in a loop
+ may reduce the I-cache performance.
(unroll_factor * ninsns) is used to estimate the number of instructions in
the unrolled loop. This implementation is a bit simplistic -- the number
of issued prefetch instructions is also affected by unrolling. So,
@@ -1623,12 +1615,21 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter,
original loop * unroll_factor (at least the induction variable increases
and the exit branches will get eliminated), so it might be better to use
tree_estimate_loop_size + estimated_unrolled_size. */
- if (est_niter < 0)
+ insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count;
+ if (insn_to_prefetch_ratio < MIN_INSN_TO_PREFETCH_RATIO)
{
- insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count;
- return insn_to_prefetch_ratio >= MIN_INSN_TO_PREFETCH_RATIO;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Not prefetching -- instruction to prefetch ratio (%d) too small\n",
+ insn_to_prefetch_ratio);
+ return false;
}
+ /* Could not do further estimation if the trip count is unknown. Just assume
+ prefetching is profitable. Too aggressive??? */
+ if (est_niter < 0)
+ return true;
+
if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead))
{
if (dump_file && (dump_flags & TDF_DETAILS))