aboutsummaryrefslogtreecommitdiff
path: root/gas
diff options
context:
space:
mode:
authorMax Filippov <jcmvbkbc@gmail.com>2017-11-14 15:16:08 -0800
committerMax Filippov <jcmvbkbc@gmail.com>2017-11-27 15:15:46 -0800
commit148d6384291720bcaaa062badf1179b6215f6da3 (patch)
treed49196e0234f9e070713837b8cc400ee6743c274 /gas
parent76a493ab99d9276180db6e791f95d1d6d86d2954 (diff)
downloadfsf-binutils-gdb-148d6384291720bcaaa062badf1179b6215f6da3.zip
fsf-binutils-gdb-148d6384291720bcaaa062badf1179b6215f6da3.tar.gz
fsf-binutils-gdb-148d6384291720bcaaa062badf1179b6215f6da3.tar.bz2
gas: xtensa: implement trampoline coalescing
There is a recurring pattern in assembly files generated by a compiler where a lot of jumps in a function are going to the same place. When these jumps are relaxed with trampolines the assembler generates a separate jump thread from each source. Create an index of trampoline jump targets for each segment and see if a jump being relaxed goes to a location from that index, in which case replace its target with a location of existing trampoline jump that results in the shortest path to the original target. gas/ 2017-11-27 Max Filippov <jcmvbkbc@gmail.com> * config/tc-xtensa.c (trampoline_chain_entry, trampoline_chain) (trampoline_chain_index): New structures. (trampoline_index): Add chain_index field. (xg_order_trampoline_chain_entry, xg_sort_trampoline_chain) (xg_find_chain_entry, xg_get_best_chain_entry) (xg_order_trampoline_chain, xg_get_trampoline_chain) (xg_find_best_eq_target, xg_add_location_to_chain) (xg_create_trampoline_chain, xg_get_single_symbol_slot): New functions. (xg_relax_fixups): Call xg_find_best_eq_target to adjust jump target to point to an existing jump. Call xg_create_trampoline_chain to create new jump target. Call xg_add_location_to_chain to add newly created trampoline jump to the corresponding chain. (add_jump_to_trampoline): Extract loop searching for a single slot with a symbol into a separate function, replace that code with a call to that function. (relax_frag_immed): Call xg_find_best_eq_target to adjust jump target to point to an existing jump. * testsuite/gas/xtensa/all.exp: Add trampoline-2 test. * testsuite/gas/xtensa/trampoline.d: Adjust absolute addresses as many duplicate trampoline chains are now coalesced. * testsuite/gas/xtensa/trampoline.s: Add _nop so that objdump stays in sync with instruction stream. * testsuite/gas/xtensa/trampoline-2.l: New test result file. * testsuite/gas/xtensa/trampoline-2.s: New test source file.
Diffstat (limited to 'gas')
-rw-r--r--gas/ChangeLog29
-rw-r--r--gas/config/tc-xtensa.c286
-rw-r--r--gas/testsuite/gas/xtensa/all.exp1
-rw-r--r--gas/testsuite/gas/xtensa/trampoline-2.l1
-rw-r--r--gas/testsuite/gas/xtensa/trampoline-2.s20
-rw-r--r--gas/testsuite/gas/xtensa/trampoline.d31
-rw-r--r--gas/testsuite/gas/xtensa/trampoline.s1
7 files changed, 341 insertions, 28 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index c129941..92d0c36 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,5 +1,34 @@
2017-11-27 Max Filippov <jcmvbkbc@gmail.com>
+ * config/tc-xtensa.c (trampoline_chain_entry, trampoline_chain)
+ (trampoline_chain_index): New structures.
+ (trampoline_index): Add chain_index field.
+ (xg_order_trampoline_chain_entry, xg_sort_trampoline_chain)
+ (xg_find_chain_entry, xg_get_best_chain_entry)
+ (xg_order_trampoline_chain, xg_get_trampoline_chain)
+ (xg_find_best_eq_target, xg_add_location_to_chain)
+ (xg_create_trampoline_chain, xg_get_single_symbol_slot): New
+ functions.
+ (xg_relax_fixups): Call xg_find_best_eq_target to adjust jump
+ target to point to an existing jump. Call
+ xg_create_trampoline_chain to create new jump target. Call
+ xg_add_location_to_chain to add newly created trampoline jump
+ to the corresponding chain.
+ (add_jump_to_trampoline): Extract loop searching for a single
+ slot with a symbol into a separate function, replace that code
+ with a call to that function.
+ (relax_frag_immed): Call xg_find_best_eq_target to adjust jump
+ target to point to an existing jump.
+ * testsuite/gas/xtensa/all.exp: Add trampoline-2 test.
+ * testsuite/gas/xtensa/trampoline.d: Adjust absolute addresses
+ as many duplicate trampoline chains are now coalesced.
+ * testsuite/gas/xtensa/trampoline.s: Add _nop so that objdump
+ stays in sync with instruction stream.
+ * testsuite/gas/xtensa/trampoline-2.l: New test result file.
+ * testsuite/gas/xtensa/trampoline-2.s: New test source file.
+
+2017-11-27 Max Filippov <jcmvbkbc@gmail.com>
+
* config/tc-xtensa.c (search_trampolines, get_best_trampoline):
Remove definitions.
(xg_find_best_trampoline_for_tinsn): New function.
diff --git a/gas/config/tc-xtensa.c b/gas/config/tc-xtensa.c
index b507d44..9defd73 100644
--- a/gas/config/tc-xtensa.c
+++ b/gas/config/tc-xtensa.c
@@ -7348,6 +7348,33 @@ xtensa_end (void)
xtensa_check_frag_count ();
}
+struct trampoline_chain_entry
+{
+ symbolS *sym;
+ addressT offset;
+};
+
+/* Trampoline chain for a given (sym, offset) pair is a sorted array
+ of locations of trampoline jumps leading there. Jumps are represented
+ as pairs (sym, offset): trampoline frag symbol and offset of the jump
+ inside the frag. */
+struct trampoline_chain
+{
+ struct trampoline_chain_entry target;
+ struct trampoline_chain_entry *entry;
+ size_t n_entries;
+ size_t n_max;
+ bfd_boolean needs_sorting;
+};
+
+struct trampoline_chain_index
+{
+ struct trampoline_chain *entry;
+ size_t n_entries;
+ size_t n_max;
+ bfd_boolean needs_sorting;
+};
+
struct trampoline_index
{
fragS **entry;
@@ -7359,7 +7386,10 @@ struct trampoline_seg
{
struct trampoline_seg *next;
asection *seg;
+ /* Trampolines ordered by their frag fr_address */
struct trampoline_index index;
+ /* Known trampoline chains ordered by (sym, offset) pair */
+ struct trampoline_chain_index chain_index;
};
static struct trampoline_seg trampoline_seg_list;
@@ -7520,6 +7550,187 @@ static bfd_boolean xg_is_trampoline_frag_full (const fragS *fragP)
return fragP->fr_var < 3;
}
+static int xg_order_trampoline_chain_entry (const void *a, const void *b)
+{
+ const struct trampoline_chain_entry *pa = a;
+ const struct trampoline_chain_entry *pb = b;
+
+ if (pa->sym == pb->sym ||
+ S_GET_VALUE (pa->sym) == S_GET_VALUE (pb->sym))
+ if (pa->offset == pb->offset)
+ return 0;
+ else
+ return pa->offset < pb->offset ? -1 : 1;
+ else
+ return S_GET_VALUE (pa->sym) < S_GET_VALUE (pb->sym) ? -1 : 1;
+}
+
+static void xg_sort_trampoline_chain (struct trampoline_chain *tc)
+{
+ qsort (tc->entry, tc->n_entries, sizeof (*tc->entry),
+ xg_order_trampoline_chain_entry);
+ tc->needs_sorting = FALSE;
+}
+
+/* Find entry index in the given chain with maximal address <= source. */
+static size_t xg_find_chain_entry (struct trampoline_chain *tc,
+ addressT source)
+{
+ size_t a = 0;
+ size_t b = tc->n_entries;
+
+ if (tc->needs_sorting)
+ xg_sort_trampoline_chain (tc);
+
+ while (b - a > 1)
+ {
+ size_t c = (a + b) / 2;
+ struct trampoline_chain_entry *e = tc->entry + c;
+
+ if (S_GET_VALUE(e->sym) + e->offset <= source)
+ a = c;
+ else
+ b = c;
+ }
+ return a;
+}
+
+/* Find the best jump target for the source in the given trampoline chain.
+ The best jump target is the one that results in the shortest path to the
+ final target, it's the location of the jump closest to the final target,
+ but within the J_RANGE - J_MARGIN from the source. */
+static struct trampoline_chain_entry *
+xg_get_best_chain_entry (struct trampoline_chain *tc, addressT source)
+{
+ addressT target = S_GET_VALUE(tc->target.sym) + tc->target.offset;
+ size_t i = xg_find_chain_entry (tc, source);
+ struct trampoline_chain_entry *e = tc->entry + i;
+ int step = target < source ? -1 : 1;
+ addressT chained_target;
+ offsetT off;
+
+ if (target > source &&
+ S_GET_VALUE(e->sym) + e->offset <= source &&
+ i + 1 < tc->n_entries)
+ ++i;
+
+ while (i + step < tc->n_entries)
+ {
+ struct trampoline_chain_entry *next = tc->entry + i + step;
+
+ chained_target = S_GET_VALUE(next->sym) + next->offset;
+ off = source - chained_target;
+
+ if (labs (off) >= J_RANGE - J_MARGIN)
+ break;
+
+ i += step;
+ }
+
+ e = tc->entry + i;
+ chained_target = S_GET_VALUE(e->sym) + e->offset;
+ off = source - chained_target;
+
+ if (labs (off) < J_MARGIN ||
+ labs (off) >= J_RANGE - J_MARGIN)
+ return &tc->target;
+ return tc->entry + i;
+}
+
+static int xg_order_trampoline_chain (const void *a, const void *b)
+{
+ const struct trampoline_chain *pa = a;
+ const struct trampoline_chain *pb = b;
+
+ return xg_order_trampoline_chain_entry (&pa->target, &pb->target);
+}
+
+static struct trampoline_chain *
+xg_get_trampoline_chain (struct trampoline_seg *ts,
+ symbolS *sym,
+ addressT offset)
+{
+ struct trampoline_chain_index *idx = &ts->chain_index;
+ struct trampoline_chain c;
+
+ if (idx->needs_sorting)
+ {
+ qsort (idx->entry, idx->n_entries, sizeof (*idx->entry),
+ xg_order_trampoline_chain);
+ idx->needs_sorting = FALSE;
+ }
+ c.target.sym = sym;
+ c.target.offset = offset;
+ return bsearch (&c, idx->entry, idx->n_entries,
+ sizeof (struct trampoline_chain),
+ xg_order_trampoline_chain);
+}
+
+/* Find trampoline chain in the given trampoline segment that is going
+ to the *sym + *offset. If found, replace *sym and *offset with the
+ best jump target in that chain. */
+static struct trampoline_chain *
+xg_find_best_eq_target (struct trampoline_seg *ts,
+ addressT source, symbolS **sym,
+ addressT *offset)
+{
+ struct trampoline_chain *tc = xg_get_trampoline_chain (ts, *sym, *offset);
+
+ if (tc)
+ {
+ struct trampoline_chain_entry *e = xg_get_best_chain_entry (tc, source);
+
+ *sym = e->sym;
+ *offset = e->offset;
+ }
+ return tc;
+}
+
+static void xg_add_location_to_chain (struct trampoline_chain *tc,
+ symbolS *sym, addressT offset)
+{
+ struct trampoline_chain_entry *e;
+
+ if (tc->n_entries == tc->n_max)
+ {
+ tc->n_max = (tc->n_max + 1) * 2;
+ tc->entry = xrealloc (tc->entry, sizeof (*tc->entry) * tc->n_max);
+ }
+ e = tc->entry + tc->n_entries;
+ e->sym = sym;
+ e->offset = offset;
+ ++tc->n_entries;
+ tc->needs_sorting = TRUE;
+}
+
+static struct trampoline_chain *
+xg_create_trampoline_chain (struct trampoline_seg *ts,
+ symbolS *sym, addressT offset)
+{
+ struct trampoline_chain_index *idx = &ts->chain_index;
+ struct trampoline_chain *tc;
+
+ if (idx->n_entries == idx->n_max)
+ {
+ idx->n_max = (idx->n_max + 1) * 2;
+ idx->entry = xrealloc (idx->entry,
+ sizeof (*idx->entry) * idx->n_max);
+ }
+
+ tc = idx->entry + idx->n_entries;
+ tc->target.sym = sym;
+ tc->target.offset = offset;
+ tc->entry = NULL;
+ tc->n_entries = 0;
+ tc->n_max = 0;
+ xg_add_location_to_chain (tc, sym, offset);
+
+ ++idx->n_entries;
+ idx->needs_sorting = TRUE;
+
+ return tc;
+}
+
void dump_trampolines (void);
void
@@ -9200,9 +9411,24 @@ static void xg_relax_fixups (struct trampoline_seg *ts)
for (fx = seginfo->fix_root; fx; fx = fx->fx_next)
{
fixS *fixP = fx;
+ struct trampoline_chain *tc = NULL;
+
+ if (xg_is_relaxable_fixup (fixP))
+ {
+ tc = xg_find_best_eq_target (ts, fixP->fx_frag->fr_address,
+ &fixP->fx_addsy, &fixP->fx_offset);
+ if (!tc)
+ tc = xg_create_trampoline_chain (ts, fixP->fx_addsy,
+ fixP->fx_offset);
+ gas_assert (tc);
+ }
while (xg_is_relaxable_fixup (fixP))
- fixP = xg_relax_fixup (idx, fixP);
+ {
+ fixP = xg_relax_fixup (idx, fixP);
+ xg_add_location_to_chain (tc, fixP->fx_frag->fr_symbol,
+ fixP->fx_where);
+ }
}
}
@@ -9889,14 +10115,14 @@ init_trampoline_frag (fragS *fp)
return growth;
}
-
static int
-add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
+xg_get_single_symbol_slot (fragS *fragP)
{
- int i, slot = -1;
+ int i;
+ int slot = -1;
for (i = 0; i < MAX_SLOTS; ++i)
- if (origfrag->tc_frag_data.slot_symbols[i])
+ if (fragP->tc_frag_data.slot_symbols[i])
{
gas_assert (slot == -1);
slot = i;
@@ -9904,10 +10130,19 @@ add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
gas_assert (slot >= 0 && slot < MAX_SLOTS);
+ return slot;
+}
+
+static fixS *
+add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
+{
+ int slot = xg_get_single_symbol_slot (origfrag);
+ fixS *fixP;
+
/* Assemble a jump to the target label in the trampoline frag. */
- xg_append_jump (tramp,
- origfrag->tc_frag_data.slot_symbols[slot],
- origfrag->tc_frag_data.slot_offsets[slot]);
+ fixP = xg_append_jump (tramp,
+ origfrag->tc_frag_data.slot_symbols[slot],
+ origfrag->tc_frag_data.slot_offsets[slot]);
/* Modify the original j to point here. */
origfrag->tc_frag_data.slot_symbols[slot] = tramp->fr_symbol;
@@ -9923,7 +10158,7 @@ add_jump_to_trampoline (fragS *tramp, fragS *origfrag)
xg_remove_trampoline_from_index (&ts->index, tr);
}
- return 3;
+ return fixP;
}
@@ -10072,15 +10307,42 @@ relax_frag_immed (segT segP,
istack.insn[istack.ninsn - 2].opcode == xtensa_j_opcode)
{
TInsn *jinsn = &istack.insn[istack.ninsn - 2];
+ struct trampoline_seg *ts = find_trampoline_seg (segP);
+ struct trampoline_chain *tc = NULL;
- if (!xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset, total_text_diff))
+ if (ts &&
+ !xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset,
+ total_text_diff))
+ {
+ int s = xg_get_single_symbol_slot (fragP);
+ addressT offset = fragP->tc_frag_data.slot_offsets[s];
+
+ tc = xg_find_best_eq_target (ts, fragP->fr_address,
+ &fragP->tc_frag_data.slot_symbols[s],
+ &offset);
+
+ if (!tc)
+ tc = xg_create_trampoline_chain (ts,
+ fragP->tc_frag_data.slot_symbols[s],
+ offset);
+ fragP->tc_frag_data.slot_offsets[s] = offset;
+ tinsn_immed_from_frag (jinsn, fragP, s);
+ }
+
+ if (!xg_symbolic_immeds_fit (jinsn, segP, fragP, fragP->fr_offset,
+ total_text_diff))
{
fragS *tf = xg_find_best_trampoline_for_tinsn (jinsn, fragP);
if (tf)
{
- this_text_diff += init_trampoline_frag (tf);
- this_text_diff += add_jump_to_trampoline (tf, fragP);
+ fixS *fixP;
+
+ this_text_diff += init_trampoline_frag (tf) + 3;
+ fixP = add_jump_to_trampoline (tf, fragP);
+ xg_add_location_to_chain (tc, fixP->fx_frag->fr_symbol,
+ fixP->fx_where);
+ fragP->tc_frag_data.relax_seen = FALSE;
}
else
{
diff --git a/gas/testsuite/gas/xtensa/all.exp b/gas/testsuite/gas/xtensa/all.exp
index 1ab3827..9fdd2e0 100644
--- a/gas/testsuite/gas/xtensa/all.exp
+++ b/gas/testsuite/gas/xtensa/all.exp
@@ -99,6 +99,7 @@ if [istarget xtensa*-*-*] then {
run_dump_test "weak-call"
run_dump_test "jlong"
run_dump_test "trampoline"
+ run_list_test "trampoline-2"
run_dump_test "first_frag_align"
run_dump_test "auto-litpools"
run_dump_test "auto-litpools-first1"
diff --git a/gas/testsuite/gas/xtensa/trampoline-2.l b/gas/testsuite/gas/xtensa/trampoline-2.l
new file mode 100644
index 0000000..36a0971
--- /dev/null
+++ b/gas/testsuite/gas/xtensa/trampoline-2.l
@@ -0,0 +1 @@
+# No warnings or errors expected!
diff --git a/gas/testsuite/gas/xtensa/trampoline-2.s b/gas/testsuite/gas/xtensa/trampoline-2.s
new file mode 100644
index 0000000..70ec496
--- /dev/null
+++ b/gas/testsuite/gas/xtensa/trampoline-2.s
@@ -0,0 +1,20 @@
+ .text
+
+ .rep 4000
+ bnez a2, .L1
+ .endr
+
+ .rep 100000
+ _nop
+ .endr
+
+.L1:
+ j .L1
+
+ .rep 100000
+ _nop
+ .endr
+
+ .rep 4000
+ bnez a2, .L1
+ .endr
diff --git a/gas/testsuite/gas/xtensa/trampoline.d b/gas/testsuite/gas/xtensa/trampoline.d
index f9aa5d9..287075b 100644
--- a/gas/testsuite/gas/xtensa/trampoline.d
+++ b/gas/testsuite/gas/xtensa/trampoline.d
@@ -5,29 +5,28 @@
.*: +file format .*xtensa.*
#...
.*0:.*j.0x1194c
-.*3:.*j.0x1194f
-.*6:.*j.0x11952
-.*9:.*j.0x11955
+.*3:.*j.0x1194c
+.*6:.*j.0x1194f
+.*9:.*j.0x11952
#...
-.*11949:.*j.0x11958
-.*1194c:.*j.0x24a0b
+.*11949:.*j.0x11955
+.*1194c:.*j.0x24a08
.*1194f:.*j.0x24a0b
-.*11952:.*j.0x24a0e
-.*11955:.*j.0x2d687
+.*11952:.*j.0x2d684
#...
+.*24a08:.*j.0x24a08
.*24a0b:.*j.0x24a0b
-.*24a0e:.*j.0x24a0e
#...
-.*2d684:.*ret
-.*2d687:.*j.0x49404
+.*2d681:.*ret
+.*2d684:.*j.0x49401
#...
-.*49404:.*j.0x49404
-.*49407:.*beqz.n.a2,.0x4940c
-.*49409:.*j.0x61aa2
+.*49401:.*j.0x49401
+.*49404:.*beqz.n.a2,.0x49409
+.*49406:.*j.0x61a9f
#...
-.*61aa2:.*j.0x7a13b
+.*61a9f:.*j.0x7a138
#...
-.*7a13b:.*j.0x927f5
+.*7a138:.*j.0x927f8
#...
-.*927f5:.*j.0x927f5
+.*927f8:.*j.0x927f8
#...
diff --git a/gas/testsuite/gas/xtensa/trampoline.s b/gas/testsuite/gas/xtensa/trampoline.s
index 997a7f9..1511343 100644
--- a/gas/testsuite/gas/xtensa/trampoline.s
+++ b/gas/testsuite/gas/xtensa/trampoline.s
@@ -25,6 +25,7 @@
_ret
.endr
_nop
+ _nop
4:
j 4b