diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2025-08-16 14:04:33 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2025-08-19 18:04:45 -0700 |
commit | 2ecaeee92414e6687ef6ce1f63fd16fcff3c5961 (patch) | |
tree | 08dd824a134fb132398ff829377c975b97d24cb8 /gcc | |
parent | 4931fc20f4f7e6b69e3b926c27179192345cb3c1 (diff) | |
download | gcc-2ecaeee92414e6687ef6ce1f63fd16fcff3c5961.zip gcc-2ecaeee92414e6687ef6ce1f63fd16fcff3c5961.tar.gz gcc-2ecaeee92414e6687ef6ce1f63fd16fcff3c5961.tar.bz2 |
x86: Place the TLS call before all register setting BBs
We can't place a TLS call before a conditional jump in a basic block like
(code_label 13 11 14 4 2 (nil) [1 uses])
(note 14 13 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(jump_insn 16 14 17 4 (set (pc)
(if_then_else (le (reg:CCNO 17 flags)
(const_int 0 [0]))
(label_ref 27)
(pc))) "x.c":10:21 discrim 1 1462 {*jcc}
(expr_list:REG_DEAD (reg:CCNO 17 flags)
(int_list:REG_BR_PROB 628353713 (nil)))
-> 27)
since the TLS call will clobber flags register nor place a TLS call in a
basic block if any live caller-saved registers aren't dead at the end of
the basic block:
;; live in 6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
;; live gen 0 [ax] 102 106 108 116 117 118 120
;; live kill 5 [di]
Instead, we should place such call before all register setting basic
blocks which dominate the current basic block.
Keep track the replaced GNU and GNU2 TLS instructions. Use these info to
place the __tls_get_addr call and mark FLAGS register as dead.
gcc/
PR target/121572
* config/i386/i386-features.cc (replace_tls_call): Add a bitmap
argument and put the updated TLS instruction in the bitmap.
(ix86_get_dominator_for_reg): New.
(ix86_check_flags_reg): Likewise.
(ix86_emit_tls_call): Likewise.
(ix86_place_single_tls_call): Add 2 bitmap arguments for updated
GNU and GNU2 TLS instructions. Call ix86_emit_tls_call to emit
TLS instruction. Correct debug dump for before instruction.
gcc/testsuite/
PR target/121572
* gcc.target/i386/pr121572-1a.c: New test.
* gcc.target/i386/pr121572-1b.c: Likewise.
* gcc.target/i386/pr121572-2a.c: Likewise.
* gcc.target/i386/pr121572-2b.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-features.cc | 334 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr121572-2a.c | 39 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr121572-2b.c | 6 |
5 files changed, 332 insertions, 106 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index f0bdc5c..7869ee2 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3684,10 +3684,12 @@ ix86_broadcast_inner (rtx op, machine_mode mode, return op; } -/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC. */ +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and + put the updated instruction in UPDATED_TLS_INSNS. */ static void -replace_tls_call (rtx src, auto_bitmap &tls_call_insns) +replace_tls_call (rtx src, auto_bitmap &tls_call_insns, + auto_bitmap &updated_tls_insns) { bitmap_iterator bi; unsigned int id; @@ -3716,6 +3718,9 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) if (recog_memoized (set_insn) < 0) gcc_unreachable (); + /* Put SET_INSN in UPDATED_TLS_INSNS. */ + bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn)); + if (dump_file) { fprintf (dump_file, "\nReplace:\n\n"); @@ -3732,15 +3737,216 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns) } } +/* Return the basic block which dominates all basic blocks which set + hard register REGNO used in basic block BB. */ + +static basic_block +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb) +{ + basic_block set_bb; + auto_bitmap set_bbs; + + /* Get all BBs which set REGNO and dominate the current BB from all + DEFs of REGNO. */ + for (df_ref def = DF_REG_DEF_CHAIN (regno); + def; + def = DF_REF_NEXT_REG (def)) + if (!DF_REF_IS_ARTIFICIAL (def) + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) + { + set_bb = DF_REF_BB (def); + if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) + bitmap_set_bit (set_bbs, set_bb->index); + } + + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); + return bb; +} + +/* Mark FLAGS register as live in DATA, a bitmap of live caller-saved + registers, if DEST is FLAGS register. */ + +static void +ix86_check_flags_reg (rtx dest, const_rtx, void *data) +{ + auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data; + if (REG_P (dest) && REGNO (dest) == FLAGS_REG) + bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG); +} + +/* Emit a TLS_SET instruction of KIND in basic block BB. Store the + insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P + for emit_insn_after. UPDATED_GNU_TLS_INSNS contains instructions + which replace the GNU TLS instructions. UPDATED_GNU2_TLS_INSNS + contains instructions which replace the GNU2 TLS instructions. */ + +static rtx_insn * +ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb, + rtx_insn **before_p, rtx_insn **after_p, + auto_bitmap &updated_gnu_tls_insns, + auto_bitmap &updated_gnu2_tls_insns) +{ + rtx_insn *tls_insn; + + do + { + rtx_insn *insn = BB_HEAD (bb); + while (insn && !NONDEBUG_INSN_P (insn)) + { + if (insn == BB_END (bb)) + break; + insn = NEXT_INSN (insn); + } + + /* TLS_GD and TLS_LD_BASE instructions are normal functions which + clobber caller-saved registers. TLSDESC instructions only + clobber FLAGS. If any registers clobbered by TLS instructions + are live in this basic block, we must insert TLS instructions + after all live registers clobbered are dead. */ + + auto_bitmap live_caller_saved_regs; + bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); + + if (bitmap_bit_p (in, FLAGS_REG)) + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); + + unsigned int i; + + /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE + instructions. */ + if (kind != X86_CSE_TLSDESC) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i] + && !fixed_regs[i] + && bitmap_bit_p (in, i)) + bitmap_set_bit (live_caller_saved_regs, i); + + if (bitmap_empty_p (live_caller_saved_regs)) + { + if (insn == BB_HEAD (bb) || insn == BB_END (bb)) + { + *before_p = insn; + tls_insn = emit_insn_before (tls_set, insn); + } + else + { + insn = PREV_INSN (insn); + *after_p = insn; + tls_insn = emit_insn_after (tls_set, insn); + } + return tls_insn; + } + + bool repeat = false; + + /* Search for REG_DEAD notes in this basic block. */ + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* NB: Conditional jump is the only instruction which reads + flags register and changes control flow. We can never + place the TLS call after unconditional jump. */ + if (JUMP_P (insn)) + { + /* This must be a conditional jump. */ + rtx label = JUMP_LABEL (insn); + if (label == nullptr + || ANY_RETURN_P (label) + || !(LABEL_P (label) || SYMBOL_REF_P (label))) + gcc_unreachable (); + + /* Place the call before all FLAGS_REG setting BBs since + we can't place a call before nor after a conditional + jump. */ + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb); + + /* Start over again. */ + repeat = true; + break; + } + + if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn))) + { + /* Insert the __tls_get_addr call before INSN which + replaces a __tls_get_addr call. */ + *before_p = insn; + tls_insn = emit_insn_before (tls_set, insn); + return tls_insn; + } + + if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn))) + { + /* Mark FLAGS register as dead since FLAGS register + would be clobbered by the GNU2 TLS instruction. */ + bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG); + continue; + } + + /* Check if FLAGS register is live. */ + note_stores (insn, ix86_check_flags_reg, + &live_caller_saved_regs); + + rtx link; + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD + && REG_P (XEXP (link, 0))) + { + /* Mark the live caller-saved register as dead. */ + for (i = REGNO (XEXP (link, 0)); + i < END_REGNO (XEXP (link, 0)); + i++) + if (i < FIRST_PSEUDO_REGISTER) + bitmap_clear_bit (live_caller_saved_regs, i); + + if (bitmap_empty_p (live_caller_saved_regs)) + { + *after_p = insn; + tls_insn = emit_insn_after (tls_set, insn); + return tls_insn; + } + } + } + + /* NB: Start over again for conditional jump. */ + if (repeat) + continue; + + gcc_assert (!bitmap_empty_p (live_caller_saved_regs)); + + /* If any live caller-saved registers aren't dead at the end of + this basic block, get the basic block which dominates all + basic blocks which set the remaining live registers. */ + auto_bitmap set_bbs; + bitmap_iterator bi; + unsigned int id; + EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) + { + basic_block set_bb = ix86_get_dominator_for_reg (id, bb); + bitmap_set_bit (set_bbs, set_bb->index); + } + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); + } + while (true); +} + /* Generate a TLS call of KIND with VAL and copy the call result to DEST, at entry of the nearest dominator for basic block map BBS, which is in the fake loop that contains the whole function, so that there is only - a single TLS CALL of KIND with VAL in the whole function. If - TLSDESC_SET isn't nullptr, insert it before the TLS call. */ + a single TLS CALL of KIND with VAL in the whole function. + UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS + instructions. UPDATED_GNU2_TLS_INSNS contains instructions which + replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, + insert it before the TLS call. */ static void ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, - bitmap bbs, rtx tlsdesc_set = nullptr) + auto_bitmap &bbs, + auto_bitmap &updated_gnu_tls_insns, + auto_bitmap &updated_gnu2_tls_insns, + rtx tlsdesc_set = nullptr) { basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); while (bb->loop_father->latch @@ -3748,17 +3954,6 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, bb = get_immediate_dominator (CDI_DOMINATORS, bb->loop_father->header); - rtx_insn *insn = BB_HEAD (bb); - while (insn && !NONDEBUG_INSN_P (insn)) - { - if (insn == BB_END (bb)) - { - insn = NULL; - break; - } - insn = NEXT_INSN (insn); - } - rtx rax = nullptr, rdi; rtx eqv = nullptr; rtx caddr; @@ -3766,7 +3961,6 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, rtx clob; rtx symbol; rtx tls; - rtx_insn *tls_insn; switch (kind) { @@ -3808,94 +4002,13 @@ ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, gcc_unreachable (); } + /* Emit the TLS CALL insn. */ rtx_insn *before = nullptr; rtx_insn *after = nullptr; - if (insn == BB_HEAD (bb)) - before = insn; - else - after = insn ? PREV_INSN (insn) : BB_END (bb); - - /* TLS_GD and TLS_LD_BASE instructions are normal functions which - clobber caller-saved registers. TLSDESC instructions only clobber - FLAGS. If any registers clobbered by TLS instructions are live - in this basic block, we must insert TLS instructions after all live - registers clobbered are dead. */ - - auto_bitmap live_caller_saved_regs; - bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); - - bool flags_live_p = bitmap_bit_p (in, FLAGS_REG); - - unsigned int i; - - /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE - instructions. */ - if (kind != X86_CSE_TLSDESC) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (call_used_regs[i] - && !fixed_regs[i] - && bitmap_bit_p (in, i)) - bitmap_set_bit (live_caller_saved_regs, i); - - if (!bitmap_empty_p (live_caller_saved_regs)) - { - /* Search for REG_DEAD notes in this basic block. */ - FOR_BB_INSNS (bb, insn) - { - if (!NONDEBUG_INSN_P (insn)) - continue; - - /* Check if FLAGS register is live. */ - set = single_set (insn); - if (set) - { - rtx dest = SET_DEST (set); - if (REG_P (dest) && REGNO (dest) == FLAGS_REG) - flags_live_p = true; - } - - rtx link; - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == REG_DEAD - && REG_P (XEXP (link, 0))) - { - /* Mark the live caller-saved register as dead. */ - for (i = REGNO (XEXP (link, 0)); - i < END_REGNO (XEXP (link, 0)); - i++) - bitmap_clear_bit (live_caller_saved_regs, i); - - /* Check if FLAGS register is dead. */ - if (REGNO (XEXP (link, 0)) == FLAGS_REG) - flags_live_p = false; - - if (bitmap_empty_p (live_caller_saved_regs)) - { - /* All live caller-saved registers are dead after - this instruction. Since TLS instructions - clobber FLAGS register, it must be dead where - the TLS will be inserted after. */ - if (flags_live_p) - gcc_unreachable (); - after = insn; - goto insert_after; - } - } - } - - /* All live caller-saved registers should be dead at the end - of this basic block. */ - gcc_unreachable (); - } - - /* Emit the TLS CALL insn. */ - if (after) - { -insert_after: - tls_insn = emit_insn_after (tls, after); - } - else - tls_insn = emit_insn_before (tls, before); + rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before, + &after, + updated_gnu_tls_insns, + updated_gnu2_tls_insns); rtx_insn *tlsdesc_insn = nullptr; if (tlsdesc_set) @@ -3936,7 +4049,7 @@ insert_after: print_rtl_single (dump_file, tlsdesc_insn); print_rtl_single (dump_file, tls_insn); fprintf (dump_file, "\nbefore:\n\n"); - print_rtl_single (dump_file, insn); + print_rtl_single (dump_file, before); fprintf (dump_file, "\n"); } } @@ -4213,6 +4326,8 @@ pass_x86_cse::x86_cse (void) basic_block bb; rtx_insn *insn; unsigned int i; + auto_bitmap updated_gnu_tls_insns; + auto_bitmap updated_gnu2_tls_insns; df_set_flags (DF_DEFER_INSN_RESCAN); @@ -4333,7 +4448,10 @@ pass_x86_cse::x86_cse (void) case X86_CSE_TLS_LD_BASE: case X86_CSE_TLSDESC: broadcast_reg = gen_reg_rtx (load->mode); - replace_tls_call (broadcast_reg, load->insns); + replace_tls_call (broadcast_reg, load->insns, + (load->kind == X86_CSE_TLSDESC + ? updated_gnu2_tls_insns + : updated_gnu_tls_insns)); load->broadcast_reg = broadcast_reg; break; @@ -4399,6 +4517,8 @@ pass_x86_cse::x86_cse (void) load->val, load->kind, load->bbs, + updated_gnu_tls_insns, + updated_gnu2_tls_insns, PATTERN (load->def_insn)); break; case X86_CSE_VEC_DUP: @@ -4442,7 +4562,9 @@ pass_x86_cse::x86_cse (void) ix86_place_single_tls_call (load->broadcast_reg, load->val, load->kind, - load->bbs); + load->bbs, + updated_gnu_tls_insns, + updated_gnu2_tls_insns); break; case X86_CSE_CONST0_VECTOR: case X86_CSE_CONSTM1_VECTOR: diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c b/gcc/testsuite/gcc.target/i386/pr121572-1a.c new file mode 100644 index 0000000..270d8ff --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c @@ -0,0 +1,41 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**bug: +**.LFB[0-9]+: +**... +** leaq tv_cache@tlsld\(%rip\), %rdi +** call __tls_get_addr@PLT +** movl \$-1, %edi +** mov[l|q] %[e|r]ax, %[e|r]bx +** call val@PLT +**... +*/ + +extern __thread int tv_cache __attribute__ ((visibility ("hidden"))); +extern void use_cache (int); +extern int val (int v); + +__attribute__ ((optimize (2))) +void +bug (void) +{ + int compared = val (-1); + + if (compared == 0 || (compared > 0 && val (2) == 0)) + { + __builtin_trap (); + } + + if (compared < 0) + { + use_cache (tv_cache); + return; + } + + use_cache (tv_cache); + __builtin_trap (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c b/gcc/testsuite/gcc.target/i386/pr121572-1b.c new file mode 100644 index 0000000..8a60891 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-1b.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu2" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } {^\t?\.} } } */ + +/* +**bug: +**.LFB[0-9]+: +**... +** lea[l|q] tv_cache@TLSDESC\(%rip\), %[e|r]ax +** movl \$-1, %edi +** call \*tv_cache@TLSCALL\(%[e|r]ax\) +** mov[l|q] %[e|r]ax, %[e|r]bx +** call val@PLT +**... +*/ + +#include "pr121572-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c b/gcc/testsuite/gcc.target/i386/pr121572-2a.c new file mode 100644 index 0000000..38b2546 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c @@ -0,0 +1,39 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */ + +typedef enum +{ + MPFR_RNDN +} mpfr_rnd_t; +typedef int mpfr_t[1]; +long __gmpfr_emin, mpfr_agm_expo_0; +_Thread_local long __gmpfr_emax; +int mpfr_agm_compare, mpfr_agm___trans_tmp_1; +mpfr_t mpfr_agm_u; +void mpfr_mul (int *, int, int, mpfr_rnd_t); +int +mpfr_agm (int op1) +{ + int op2 = 0; + if (__builtin_expect (mpfr_agm_compare == 0, 0)) + return 0; + if (mpfr_agm_compare > 0) + { + int t = op1; + op2 = t; + } + mpfr_agm_expo_0 = __gmpfr_emax; + for (;;) + { + retry: + mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN); + if (0) + goto retry; + if (__builtin_expect (mpfr_agm___trans_tmp_1, 1)) + break; + } + __gmpfr_emin = __gmpfr_emax; + return 0; +} + +/* { dg-final { scan-assembler-times "call\[ \t\]__tls_get_addr@PLT" 1 { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c b/gcc/testsuite/gcc.target/i386/pr121572-2b.c new file mode 100644 index 0000000..33d7002 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */ + +#include "pr121572-2a.c" + +/* { dg-final { scan-assembler-times "call\[ \t\]\\*__gmpfr_emax@TLSCALL\\(%(?:r|e)ax\\)" 1 { target { ! ia32 } } } } */ |