diff options
author | Changpeng Fang <changpeng.fang@amd.com> | 2010-07-02 16:34:38 +0000 |
---|---|---|
committer | Sebastian Pop <spop@gcc.gnu.org> | 2010-07-02 16:34:38 +0000 |
commit | 14e444c392d7ab69d0dcd299df8abbac067d07dc (patch) | |
tree | 4fae2c020fec2caf58422c7e0546771c17e6f8ec /gcc/tree-ssa-loop-prefetch.c | |
parent | a245c04bccad9eea35cf21def68ae812fd29a953 (diff) | |
download | gcc-14e444c392d7ab69d0dcd299df8abbac067d07dc.zip gcc-14e444c392d7ab69d0dcd299df8abbac067d07dc.tar.gz gcc-14e444c392d7ab69d0dcd299df8abbac067d07dc.tar.bz2 |
Reduce the cost in miss rate computation.
2010-07-02 Changpeng Fang <changpeng.fang@amd.com>
* tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to
is_miss_rate_acceptable. Pull total_positions computation
out of the loops. Early return if miss_positions exceeds
the acceptable threshold.
* tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call
is_miss_rate_acceptable after renaming of compute_miss_rate.
From-SVN: r161728
Diffstat (limited to 'gcc/tree-ssa-loop-prefetch.c')
-rw-r--r-- | gcc/tree-ssa-loop-prefetch.c | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 934b49c..c3e90d2 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) /* Given a CACHE_LINE_SIZE and two inductive memory references with a common STEP greater than CACHE_LINE_SIZE and an address difference DELTA, compute the probability that they will fall - in different cache lines. DISTINCT_ITERS is the number of - distinct iterations after which the pattern repeats itself. + in different cache lines. Return true if the computed miss rate + is not greater than the ACCEPTABLE_MISS_RATE. DISTINCT_ITERS is the + number of distinct iterations after which the pattern repeats itself. ALIGN_UNIT is the unit of alignment in bytes. */ -static int -compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, +static bool +is_miss_rate_acceptable (unsigned HOST_WIDE_INT cache_line_size, HOST_WIDE_INT step, HOST_WIDE_INT delta, unsigned HOST_WIDE_INT distinct_iters, int align_unit) { unsigned align, iter; - int total_positions, miss_positions, miss_rate; + int total_positions, miss_positions, max_allowed_miss_positions; int address1, address2, cache_line1, cache_line2; /* It always misses if delta is greater than or equal to the cache line size. */ - if (delta >= cache_line_size) - return 1000; + if (delta >= (HOST_WIDE_INT) cache_line_size) + return false; - total_positions = 0; miss_positions = 0; + total_positions = (cache_line_size / align_unit) * distinct_iters; + max_allowed_miss_positions = (ACCEPTABLE_MISS_RATE * total_positions) / 1000; /* Iterate through all possible alignments of the first memory reference within its cache line. */ @@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, address2 = address1 + delta; cache_line1 = address1 / cache_line_size; cache_line2 = address2 / cache_line_size; - total_positions += 1; if (cache_line1 != cache_line2) - miss_positions += 1; + { + miss_positions += 1; + if (miss_positions > max_allowed_miss_positions) + return false; + } } - miss_rate = 1000 * miss_positions / total_positions; - return miss_rate; + return true; } /* Prune the prefetch candidate REF using the reuse with BY. @@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; unsigned HOST_WIDE_INT prefetch_before, prefetch_block; - int miss_rate; HOST_WIDE_INT reduced_step; unsigned HOST_WIDE_INT reduced_prefetch_block; tree ref_type; @@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, delta %= step; ref_type = TREE_TYPE (ref->mem); align_unit = TYPE_ALIGN (ref_type) / 8; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { /* Do not reduce prefetch_before if we meet beyond cache size. */ if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) @@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; |