aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2020-07-20 20:34:46 +0200
committerUros Bizjak <ubizjak@gmail.com>2020-07-20 20:37:10 +0200
commit3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c (patch)
treecc7e1025a52c224d67d2c5e9721fabe1a242d6af /gcc
parentd5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6 (diff)
downloadgcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.zip
gcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.tar.gz
gcc-3c5e83d5b32c31b11cf1684bf5d1ab3e7174685c.tar.bz2
i386: Use lock prefixed insn instead of MFENCE [PR95750]
Currently, __atomic_thread_fence(seq_cst) on x86 and x86-64 generates mfence instruction. A dummy atomic instruction (a lock-prefixed instruction or xchg with a memory operand) would provide the same sequential consistency guarantees while being more efficient on most current CPUs. The mfence instruction additionally orders non-temporal stores, which is not relevant for atomic operations and are not ordered by seq_cst atomic operations anyway. 2020-07-20 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/95750 * config/i386/i386.h (TARGET_AVOID_MFENCE): Rename from TARGET_USE_XCHG_FOR_ATOMIC_STORE. * config/i386/sync.md (mfence_sse2): Disable for TARGET_AVOID_MFENCE. (mfence_nosse): Enable also for TARGET_AVOID_MFENCE. Emit stack referred memory in word_mode. (mem_thread_fence): Do not generate mfence_sse2 pattern when TARGET_AVOID_MFENCE is true. (atomic_store<mode>): Update for rename. * config/i386/x86-tune.def (X86_TUNE_AVOID_MFENCE): Rename from X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE. gcc/testsuite/ChangeLog: PR target/95750 * gcc.target/i386/pr95750.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/sync.md21
-rw-r--r--gcc/config/i386/x86-tune.def4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr95750.c19
4 files changed, 36 insertions, 11 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f4a8f13..114967e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -598,8 +598,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
#define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
-#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \
- ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
+#define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE]
#define TARGET_EMIT_VZEROUPPER \
ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
#define TARGET_EXPAND_ABS \
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index e221090..c682703 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -89,7 +89,8 @@
(define_insn "mfence_sse2"
[(set (match_operand:BLK 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
- "TARGET_64BIT || TARGET_SSE2"
+ "(TARGET_64BIT || TARGET_SSE2)
+ && !TARGET_AVOID_MFENCE"
"mfence"
[(set_attr "type" "sse")
(set_attr "length_address" "0")
@@ -100,8 +101,14 @@
[(set (match_operand:BLK 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
(clobber (reg:CC FLAGS_REG))]
- "!(TARGET_64BIT || TARGET_SSE2)"
- "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
+ "!(TARGET_64BIT || TARGET_SSE2)
+ || TARGET_AVOID_MFENCE"
+{
+ rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
+
+ output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem);
+ return "";
+}
[(set_attr "memory" "unknown")])
(define_expand "mem_thread_fence"
@@ -117,7 +124,8 @@
rtx (*mfence_insn)(rtx);
rtx mem;
- if (TARGET_64BIT || TARGET_SSE2)
+ if ((TARGET_64BIT || TARGET_SSE2)
+ && !TARGET_AVOID_MFENCE)
mfence_insn = gen_mfence_sse2;
else
mfence_insn = gen_mfence_nosse;
@@ -306,11 +314,10 @@
{
operands[1] = force_reg (<MODE>mode, operands[1]);
- /* For seq-cst stores, use XCHG when we lack MFENCE
- or when target prefers XCHG. */
+ /* For seq-cst stores, use XCHG when we lack MFENCE. */
if (is_mm_seq_cst (model)
&& (!(TARGET_64BIT || TARGET_SSE2)
- || TARGET_USE_XCHG_FOR_ATOMIC_STORE))
+ || TARGET_AVOID_MFENCE))
{
emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
operands[0], operands[1],
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 1776aba..6eff825 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -313,8 +313,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
| m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
-/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence. */
-DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
+/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */
+DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
diff --git a/gcc/testsuite/gcc.target/i386/pr95750.c b/gcc/testsuite/gcc.target/i386/pr95750.c
new file mode 100644
index 0000000..c47108f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95750.c
@@ -0,0 +1,19 @@
+/* PR target/95750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=core2" } */
+
+void
+foo (void)
+{
+ __atomic_thread_fence (__ATOMIC_SEQ_CST);
+}
+
+int x;
+
+void
+bar (void)
+{
+ __atomic_store_n (&x, -1, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "mfence" } } */