diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2020-07-20 20:34:46 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2020-07-20 20:37:10 +0200 |
commit | 3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c (patch) | |
tree | cc7e1025a52c224d67d2c5e9721fabe1a242d6af /gcc | |
parent | d5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6 (diff) | |
download | gcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.zip gcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.tar.gz gcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.tar.bz2 |
i386: Use lock prefixed insn instead of MFENCE [PR95750]
Currently, __atomic_thread_fence(seq_cst) on x86 and x86-64 generates
mfence instruction. A dummy atomic instruction (a lock-prefixed instruction
or xchg with a memory operand) would provide the same sequential consistency
guarantees while being more efficient on most current CPUs. The mfence
instruction additionally orders non-temporal stores, which is not relevant
for atomic operations and are not ordered by seq_cst atomic operations anyway.
2020-07-20 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/95750
* config/i386/i386.h (TARGET_AVOID_MFENCE):
Rename from TARGET_USE_XCHG_FOR_ATOMIC_STORE.
* config/i386/sync.md (mfence_sse2): Disable for TARGET_AVOID_MFENCE.
(mfence_nosse): Enable also for TARGET_AVOID_MFENCE. Emit stack
referred memory in word_mode.
(mem_thread_fence): Do not generate mfence_sse2 pattern when
TARGET_AVOID_MFENCE is true.
(atomic_store<mode>): Update for rename.
* config/i386/x86-tune.def (X86_TUNE_AVOID_MFENCE):
Rename from X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE.
gcc/testsuite/ChangeLog:
PR target/95750
* gcc.target/i386/pr95750.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/sync.md | 21 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr95750.c | 19 |
4 files changed, 36 insertions, 11 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f4a8f13..114967e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -598,8 +598,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] -#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \ - ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE] +#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE] #define TARGET_EMIT_VZEROUPPER \ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] #define TARGET_EXPAND_ABS \ diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index e221090..c682703 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -89,7 +89,8 @@ (define_insn "mfence_sse2" [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] - "TARGET_64BIT || TARGET_SSE2" + "(TARGET_64BIT || TARGET_SSE2) + && !TARGET_AVOID_MFENCE" "mfence" [(set_attr "type" "sse") (set_attr "length_address" "0") @@ -100,8 +101,14 @@ [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) (clobber (reg:CC FLAGS_REG))] - "!(TARGET_64BIT || TARGET_SSE2)" - "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" + "!(TARGET_64BIT || TARGET_SSE2) + || TARGET_AVOID_MFENCE" +{ + rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx); + + output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem); + return ""; +} [(set_attr "memory" "unknown")]) (define_expand "mem_thread_fence" @@ -117,7 +124,8 @@ rtx (*mfence_insn)(rtx); rtx mem; - if (TARGET_64BIT || TARGET_SSE2) + if ((TARGET_64BIT || TARGET_SSE2) + && !TARGET_AVOID_MFENCE) mfence_insn = gen_mfence_sse2; else mfence_insn = gen_mfence_nosse; @@ -306,11 +314,10 @@ { operands[1] = force_reg (<MODE>mode, operands[1]); - /* For seq-cst stores, use XCHG when we lack MFENCE - or when target prefers XCHG. */ + /* For seq-cst stores, use XCHG when we lack MFENCE. */ if (is_mm_seq_cst (model) && (!(TARGET_64BIT || TARGET_SSE2) - || TARGET_USE_XCHG_FOR_ATOMIC_STORE)) + || TARGET_AVOID_MFENCE)) { emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode), operands[0], operands[1], diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 1776aba..6eff825 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -313,8 +313,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC) -/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence. */ -DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store", +/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ +DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by diff --git a/gcc/testsuite/gcc.target/i386/pr95750.c b/gcc/testsuite/gcc.target/i386/pr95750.c new file mode 100644 index 0000000..c47108f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95750.c @@ -0,0 +1,19 @@ +/* PR target/95750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=core2" } */ + +void +foo (void) +{ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +int x; + +void +bar (void) +{ + __atomic_store_n (&x, -1, __ATOMIC_SEQ_CST); +} + +/* { dg-final { scan-assembler-not "mfence" } } */ |