Check in patch/merge from cxx-mem-model Branch

From-SVN: r181031
author: Andrew Macleod <amacleod@gcc.gnu.org> 2011-11-06 14:55:48 +0000
committer: Andrew Macleod <amacleod@gcc.gnu.org> 2011-11-06 14:55:48 +0000
commit: 86951993f8a4cae2fb26bf8705e2f248a8d6f21e (patch)
tree: c0f499483e35c60c1b9f065f10a630e6fa4345bc /gcc
parent: a8a058f6523f1e0f7b69ec1837848e55cf9f0856 (diff)
download: gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.zip
gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.tar.gz
gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.tar.bz2
105 files changed, 9335 insertions, 1323 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a96d516..1c2f1bc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,111 @@
+2011-11-06  Andrew Macleod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* cppbuiltin.c (define__GNUC__): Define __ATOMIC memory models
+	* coretypes.h (enum memmodel): New.  enumerated memory model type.
+	* Makefile.in (cppbuiltin.o) Add missing dependency on $(TREE_H)
+	* genopinit,c (optabs): Add atomic direct optab handlers.
+	* sync-builtins.def (BUILT_IN_ATOMIC_*): New builtins.
+	* builtin-types.def (BT_CONST_VOLATILE_PTR,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT): New builtin types.
+	* expr.h (expand_atomic_*): Add prototypes.
+	(expand_{bool,val}_compare_and_swap): Remove prototypes.
+	* c-typeck.c (build_function_call_vec): Don't reprocess __atomic
+	parameters.
+	* common.opt (Winvalid-memory-model): New warning flag.
+	(finline-atomics): New. Flag to disable atomic inlining.
+	* params.h (ALLOW_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_STORE_DATA_RACES): New.
+	* params.def (PARAM_ALLOW_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_STORE_DATA_RACES): New.
+	* builtins.c (is_builtin_name): Handle __atomic.
+	(get_memmodel): New.  Extract memory model.
+	(expand_expr_force_mode): New. Factor out common code for ensuring an
+	integer argument is in the proper mode.
+	(expand_builtin_sync_operation): Remove ignore param.  Always call
+	expand_atomic_fetch_op instead of the old expanders.
+	(expand_builtin_compare_and_swap,
+	expand_builtin_sync_lock_test_and_set): Use expand_expr_force_mode,
+	call atomic expanders instead of sync expanders.
+	(expand_builtin_sync_lock_release): Call atomic_store expander.
+	(expand_builtin_atomic_compare_exchange, expand_builtin_atomic_load,
+	expand_builtin_atomic_store, expand_builtin_atomic_fetch_op): New.
+	(expand_builtin_atomic_exchange): New.
+	(fold_builtin_atomic_always_lock_free,
+	expand_builtin_atomic_always_lock_free,
+	fold_builtin_atomic_is_lock_free, expand_builtin_atomic_is_lock_free):
+	New.
+	(expand_builtin_mem_thread_fence, expand_builtin_atomic_thread_fence,
+	expand_builtin_atomic_signal_fence): New.
+	(expand_builtin_mem_signal_fence): New.
+	(expand_builtin): Add cases for BUILT_IN_ATOMIC_*.
+	(fold_builtin_2): Add cases for BUILT_IN_ATOMIC_{IS,ALWAYS}_LOCK_FREE.
+	* optabs.h (DOI_atomic_*): Define new atomics.
+	(atomic_*_optab): Define.
+	(can_compare_and_swap_p, expand_atomic_compare_and_swap): New
+	prototypes.
+	* optabs.c (expand_sync_operation, expand_sync_fetch_operation): Remove.
+	(expand_sync_lock_test_and_set): Remove.
+	(expand_atomic_load, expand_atomic_store): New.
+	(expand_atomic_exchange): New. 
+	(expand_atomic_compare_and_swap): New.  Implements
+	atomic_compare_exchange via compare and swap.
+	(struct atomic_op_functions): Opcode table struct for fetch ops.
+	(get_atomic_op_for_code): New.  Return an opcode table entry.
+	(maybe_emit_op): New.  Try to emit a fetch op.
+	(expand_atomic_fetch_op): New.
+	(expand_val_compare_and_swap_1): Remove.
+	(expand_val_compare_and_swap, expand_bool_compare_and_swap): Remove.
+	(expand_atomic_compare_and_swap): Rename from
+	expand_atomic_compare_exchange.  Rewrite to return both success and
+	oldval return values; expand via both atomic and sync optabs.
+	(can_compare_and_swap_p): New.
+	(expand_compare_and_swap_loop): Use expand_atomic_compare_and_swap.
+	(maybe_gen_insn): Handle 7 and 8 operands.
+	* omp-low.c (expand_omp_atomic_fetch_op): Don't test individual
+	fetch_op optabs, only test can_compare_and_swap_p.  Use __atomic
+	builtins instead of __sync builtins.
+	(expand_omp_atomic_pipeline): Use can_compare_and_swap_p.
+	* doc/extend.texi: Document __atomic built-in functions.
+	* doc/invoke.texi: Document data race parameters.
+	* doc/md.texi: Document atomic patterns.
+	* config/i386/i386.md (UNSPEC_MOVA): New.
+	(UNSPECV_CMPXCHG): Split into ...
+	(UNSPECV_CMPXCHG_1, UNSPECV_CMPXCHG_2,
+	UNSPECV_CMPXCHG_3, UNSPECV_CMPXCHG_4): New.
+	* config/i386/sync.md (ATOMIC): New mode iterator.
+	(atomic_load<ATOMIC>, atomic_store<ATOMIC>): New.
+	(atomic_loaddi_fpu, atomic_storedi_fpu, movdi_via_fpu): New.
+	(mem_thread_fence): Rename from memory_barrier.
+	Handle the added memory model parameter.
+	(mfence_nosse): Rename from memory_barrier_nosse.
+	(sync_compare_and_swap<CASMODE>): Split into ...
+	(atomic_compare_and_swap<SWI124>): this and ...
+	(atomic_compare_and_swap<CASMODE>): this.  Handle the new parameters.
+	(atomic_compare_and_swap_single<SWI>): Rename from
+	sync_compare_and_swap<SWI>; rewrite to use split unspecs.
+	(atomic_compare_and_swap_double<DCASMODE>): Rename from
+	sync_double_compare_and_swap<DCASMODE>; rewrite to use split unspecs.
+	(*atomic_compare_and_swap_doubledi_pic): Rename from
+	sync_double_compare_and_swapdi_pic; rewrite to use split unspecs.
+	(atomic_fetch_add<SWI>): Rename from sync_old_add<SWI>; add memory
+	model parameter.
+	(*atomic_fetch_add_cmp<SWI>): Similarly.
+	(atomic_add<SWI>, atomic<any_logic><SWI>): Similarly.
+	(atomic_sub<SWI>): Similarly.  Use x86_maybe_negate_const_int.
+	(sync_lock_test_and_set<SWI>): Merge with ...
+	(atomic_exchange<SWI>): ... this.
+
 2011-11-6  Richard Guenther  <rguenther@suse.de>
 
 	* ipa-prop.c (ipa_modify_call_arguments): Re-compute
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0ca81ad..aebd098 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3978,7 +3978,7 @@ PREPROCESSOR_DEFINES = \
 
 CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
 cppbuiltin.o: cppbuiltin.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
-	cppbuiltin.h Makefile
+	$(TREE_H) cppbuiltin.h Makefile
 
 CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
 cppdefault.o: cppdefault.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 551b222..a6d0127 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -95,6 +95,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
 		    build_pointer_type
 		     (build_qualified_type (void_type_node,
 					    TYPE_QUAL_VOLATILE)))
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_PRIMITIVE_TYPE (BT_PTRMODE, (*lang_hooks.types.type_for_mode)(ptr_mode, 0))
 DEF_PRIMITIVE_TYPE (BT_INT_PTR, integer_ptr_type_node)
 DEF_PRIMITIVE_TYPE (BT_FLOAT_PTR, float_ptr_type_node)
@@ -315,6 +319,20 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_PTR_LONG, BT_PTR_LONG)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
 		     BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)
 
 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)
 
@@ -383,6 +401,16 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
 		     BT_PTR, BT_UINT)
 DEF_FUNCTION_TYPE_3 (BT_FN_PTR_CONST_PTR_INT_SIZE, BT_PTR,
 		     BT_CONST_PTR, BT_INT, BT_SIZE)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)
 
 DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
 		     BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@@ -402,6 +430,10 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
@@ -409,6 +441,9 @@ DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)
+
 
 DEF_FUNCTION_TYPE_6 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_SIZE, BT_INT, BT_SIZE,
@@ -422,6 +457,24 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)
+
 
 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
diff --git a/gcc/builtins.c b/gcc/builtins.c
index bad3165..dc9fe78 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -223,6 +223,7 @@ static tree do_mpfr_bessel_n (tree, tree, tree,
 			      const REAL_VALUE_TYPE *, bool);
 static tree do_mpfr_remquo (tree, tree, tree);
 static tree do_mpfr_lgamma_r (tree, tree, tree);
+static void expand_builtin_sync_synchronize (void);
 
 /* Return true if NAME starts with __builtin_ or __sync_.  */
 
@@ -233,6 +234,8 @@ is_builtin_name (const char *name)
     return true;
   if (strncmp (name, "__sync_", 7) == 0)
     return true;
+  if (strncmp (name, "__atomic_", 9) == 0)
+    return true;
   return false;
 }
 
@@ -5090,21 +5093,41 @@ get_builtin_sync_mem (tree loc, enum machine_mode mode)
   return mem;
 }
 
+/* Make sure an argument is in the right mode.
+   EXP is the tree argument. 
+   MODE is the mode it should be in.  */
+
+static rtx
+expand_expr_force_mode (tree exp, enum machine_mode mode)
+{
+  rtx val;
+  enum machine_mode old_mode;
+
+  val = expand_expr (exp, NULL_RTX, mode, EXPAND_NORMAL);
+  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
+     of CONST_INTs, where we know the old_mode only from the call argument.  */
+
+  old_mode = GET_MODE (val);
+  if (old_mode == VOIDmode)
+    old_mode = TYPE_MODE (TREE_TYPE (exp));
+  val = convert_modes (mode, old_mode, val, 1);
+  return val;
+}
+
+
 /* Expand the __sync_xxx_and_fetch and __sync_fetch_and_xxx intrinsics.
    EXP is the CALL_EXPR.  CODE is the rtx code
    that corresponds to the arithmetic or logical operation from the name;
    an exception here is that NOT actually means NAND.  TARGET is an optional
    place for us to store the results; AFTER is true if this is the
-   fetch_and_xxx form.  IGNORE is true if we don't actually care about
-   the result of the operation at all.  */
+   fetch_and_xxx form.  */
 
 static rtx
 expand_builtin_sync_operation (enum machine_mode mode, tree exp,
 			       enum rtx_code code, bool after,
-			       rtx target, bool ignore)
+			       rtx target)
 {
   rtx val, mem;
-  enum machine_mode old_mode;
   location_t loc = EXPR_LOCATION (exp);
 
   if (code == NOT && warn_sync_nand)
@@ -5151,19 +5174,10 @@ expand_builtin_sync_operation (enum machine_mode mode, tree exp,
 
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
 
-  val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  val = convert_modes (mode, old_mode, val, 1);
-
-  if (ignore)
-    return expand_sync_operation (mem, val, code);
-  else
-    return expand_sync_fetch_operation (mem, val, code, after, target);
+  return expand_atomic_fetch_op (target, mem, val, code, MEMMODEL_SEQ_CST,
+				 after);
 }
 
 /* Expand the __sync_val_compare_and_swap and __sync_bool_compare_and_swap
@@ -5176,34 +5190,19 @@ expand_builtin_compare_and_swap (enum machine_mode mode, tree exp,
 				 bool is_bool, rtx target)
 {
   rtx old_val, new_val, mem;
-  enum machine_mode old_mode;
 
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  old_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+  new_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
 
+  if (!expand_atomic_compare_and_swap ((is_bool ? &target : NULL),
+				       (is_bool ? NULL : &target),
+				       mem, old_val, new_val, false,
+				       MEMMODEL_SEQ_CST, MEMMODEL_SEQ_CST))
+    return NULL_RTX;
 
-  old_val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
-			 mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (old_val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  old_val = convert_modes (mode, old_mode, old_val, 1);
-
-  new_val = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX,
-			 mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (new_val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 2)));
-  new_val = convert_modes (mode, old_mode, new_val, 1);
-
-  if (is_bool)
-    return expand_bool_compare_and_swap (mem, old_val, new_val, target);
-  else
-    return expand_val_compare_and_swap (mem, old_val, new_val, target);
+  return target;
 }
 
 /* Expand the __sync_lock_test_and_set intrinsic.  Note that the most
@@ -5214,22 +5213,461 @@ expand_builtin_compare_and_swap (enum machine_mode mode, tree exp,
 
 static rtx
 expand_builtin_sync_lock_test_and_set (enum machine_mode mode, tree exp,
-				  rtx target)
+				       rtx target)
 {
   rtx val, mem;
-  enum machine_mode old_mode;
 
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-  val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, mode, EXPAND_NORMAL);
-  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
-     of CONST_INTs, where we know the old_mode only from the call argument.  */
-  old_mode = GET_MODE (val);
-  if (old_mode == VOIDmode)
-    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
-  val = convert_modes (mode, old_mode, val, 1);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
+}
+
+/* Expand the __sync_lock_release intrinsic.  EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
+{
+  rtx mem;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE);
+}
+
+/* Given an integer representing an ``enum memmodel'', verify its
+   correctness and return the memory model enum.  */
+
+static enum memmodel
+get_memmodel (tree exp)
+{
+  rtx op;
+
+  /* If the parameter is not a constant, it's a run time value so we'll just
+     convert it to MEMMODEL_SEQ_CST to avoid annoying runtime checking.  */
+  if (TREE_CODE (exp) != INTEGER_CST)
+    return MEMMODEL_SEQ_CST;
+
+  op = expand_normal (exp);
+  if (INTVAL (op) < 0 || INTVAL (op) >= MEMMODEL_LAST)
+    {
+      warning (OPT_Winvalid_memory_model,
+	       "invalid memory model argument to builtin");
+      return MEMMODEL_SEQ_CST;
+    }
+  return (enum memmodel) INTVAL (op);
+}
+
+/* Expand the __atomic_exchange intrinsic:
+   	TYPE __atomic_exchange (TYPE *object, TYPE desired, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_exchange (enum machine_mode mode, tree exp, rtx target)
+{
+  rtx val, mem;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+  if (model == MEMMODEL_CONSUME)
+    {
+      error ("invalid memory model for %<__atomic_exchange%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  return expand_atomic_exchange (target, mem, val, model);
+}
+
+/* Expand the __atomic_compare_exchange intrinsic:
+   	bool __atomic_compare_exchange (TYPE *object, TYPE *expect, 
+					TYPE desired, BOOL weak, 
+					enum memmodel success,
+					enum memmodel failure)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp, 
+					rtx target)
+{
+  rtx expect, desired, mem, oldval;
+  enum memmodel success, failure;
+  tree weak;
+  bool is_weak;
+
+  success = get_memmodel (CALL_EXPR_ARG (exp, 4));
+  failure = get_memmodel (CALL_EXPR_ARG (exp, 5));
+
+  if (failure == MEMMODEL_RELEASE || failure == MEMMODEL_ACQ_REL)
+    {
+      error ("invalid failure memory model for %<__atomic_compare_exchange%>");
+      return NULL_RTX;
+    }
+
+  if (failure > success)
+    {
+      error ("failure memory model cannot be stronger than success "
+	     "memory model for %<__atomic_compare_exchange%>");
+      return NULL_RTX;
+    }
+  
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  expect = expand_normal (CALL_EXPR_ARG (exp, 1));
+  expect = convert_memory_address (Pmode, expect);
+  desired = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
+
+  weak = CALL_EXPR_ARG (exp, 3);
+  is_weak = false;
+  if (host_integerp (weak, 0) && tree_low_cst (weak, 0) != 0)
+    is_weak = true;
+
+  oldval = copy_to_reg (gen_rtx_MEM (mode, expect));
+
+  if (!expand_atomic_compare_and_swap (&target, &oldval, mem, oldval,
+				       desired, is_weak, success, failure))
+    return NULL_RTX;
+
+  emit_move_insn (gen_rtx_MEM (mode, expect), oldval);
+  return target;
+}
+
+/* Expand the __atomic_load intrinsic:
+   	TYPE __atomic_load (TYPE *object, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_load (enum machine_mode mode, tree exp, rtx target)
+{
+  rtx mem;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 1));
+  if (model == MEMMODEL_RELEASE
+      || model == MEMMODEL_ACQ_REL)
+    {
+      error ("invalid memory model for %<__atomic_load%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operand.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+
+  return expand_atomic_load (target, mem, model);
+}
+
+
+/* Expand the __atomic_store intrinsic:
+   	void __atomic_store (TYPE *object, TYPE desired, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.  */
+
+static rtx
+expand_builtin_atomic_store (enum machine_mode mode, tree exp)
+{
+  rtx mem, val;
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+  if (model != MEMMODEL_RELAXED
+      && model != MEMMODEL_SEQ_CST
+      && model != MEMMODEL_RELEASE)
+    {
+      error ("invalid memory model for %<__atomic_store%>");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  return expand_atomic_store (mem, val, model);
+}
+
+/* Expand the __atomic_fetch_XXX intrinsic:
+   	TYPE __atomic_fetch_XXX (TYPE *object, TYPE val, enum memmodel)
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.
+   CODE is the operation, PLUS, MINUS, ADD, XOR, or IOR.
+   FETCH_AFTER is true if returning the result of the operation.
+   FETCH_AFTER is false if returning the value before the operation.
+   IGNORE is true if the result is not used.
+   EXT_CALL is the correct builtin for an external call if this cannot be
+   resolved to an instruction sequence.  */
+
+static rtx
+expand_builtin_atomic_fetch_op (enum machine_mode mode, tree exp, rtx target,
+				enum rtx_code code, bool fetch_after,
+				bool ignore, enum built_in_function ext_call)
+{
+  rtx val, mem, ret;
+  enum memmodel model;
+  tree fndecl;
+  tree addr;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 2));
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
+
+  /* Only try generating instructions if inlining is turned on.  */
+  if (flag_inline_atomics)
+    {
+      ret = expand_atomic_fetch_op (target, mem, val, code, model, fetch_after);
+      if (ret)
+	return ret;
+    }
+
+  /* Return if a different routine isn't needed for the library call.  */
+  if (ext_call == BUILT_IN_NONE)
+    return NULL_RTX;
+
+  /* Change the call to the specified function.  */
+  fndecl = get_callee_fndecl (exp);
+  addr = CALL_EXPR_FN (exp);
+  STRIP_NOPS (addr);
+
+  gcc_assert (TREE_OPERAND (addr, 0) == fndecl);
+  TREE_OPERAND (addr, 0) = builtin_decl_explicit(ext_call);
+
+  /* Expand the call here so we can emit trailing code.  */
+  ret = expand_call (exp, target, ignore);
+
+  /* Replace the original function just in case it matters.  */
+  TREE_OPERAND (addr, 0) = fndecl;
+
+  /* Then issue the arithmetic correction to return the right result.  */
+  if (!ignore)
+    ret = expand_simple_binop (mode, code, ret, val, NULL_RTX, true,
+			       OPTAB_LIB_WIDEN);
+  return ret;
+}
+
+/* Return true if (optional) argument ARG1 of size ARG0 is always lock free on
+   this architecture.  If ARG1 is NULL, use typical alignment for size ARG0.  */
+
+static tree
+fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
+{
+  int size;
+  enum machine_mode mode;
+  unsigned int mode_align, type_align;
+
+  if (TREE_CODE (arg0) != INTEGER_CST)
+    return NULL_TREE;
+
+  size = INTVAL (expand_normal (arg0)) * BITS_PER_UNIT;
+  mode = mode_for_size (size, MODE_INT, 0);
+  mode_align = GET_MODE_ALIGNMENT (mode);
+
+  if (TREE_CODE (arg1) == INTEGER_CST && INTVAL (expand_normal (arg1)) == 0)
+    type_align = mode_align;
+  else
+    {
+      tree ttype = TREE_TYPE (arg1);
+
+      /* This function is usually invoked and folded immediately by the front
+	 end before anything else has a chance to look at it.  The pointer
+	 parameter at this point is usually cast to a void *, so check for that
+	 and look past the cast.  */
+      if (TREE_CODE (arg1) == NOP_EXPR && POINTER_TYPE_P (ttype)
+	  && VOID_TYPE_P (TREE_TYPE (ttype)))
+	arg1 = TREE_OPERAND (arg1, 0);
+
+      ttype = TREE_TYPE (arg1);
+      gcc_assert (POINTER_TYPE_P (ttype));
+
+      /* Get the underlying type of the object.  */
+      ttype = TREE_TYPE (ttype);
+      type_align = TYPE_ALIGN (ttype);
+    }
+
+  /* If the object has smaller alignment, the the lock free routines cannot
+     be used.  */
+  if (type_align < mode_align)
+    return integer_zero_node;
+
+  /* Check if a compare_and_swap pattern exists for the mode which represents
+     the required size.  The pattern is not allowed to fail, so the existence
+     of the pattern indicates support is present.  */
+  if (can_compare_and_swap_p (mode))
+    return integer_one_node;
+  else
+    return integer_zero_node;
+}
+
+/* Return true if the parameters to call EXP represent an object which will
+   always generate lock free instructions.  The first argument represents the
+   size of the object, and the second parameter is a pointer to the object 
+   itself.  If NULL is passed for the object, then the result is based on 
+   typical alignment for an object of the specified size.  Otherwise return 
+   false.  */
+
+static rtx
+expand_builtin_atomic_always_lock_free (tree exp)
+{
+  tree size;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
 
-  return expand_sync_lock_test_and_set (mem, val, target);
+  if (TREE_CODE (arg0) != INTEGER_CST)
+    {
+      error ("non-constant argument 1 to __atomic_always_lock_free");
+      return const0_rtx;
+    }
+
+  size = fold_builtin_atomic_always_lock_free (arg0, arg1);
+  if (size == integer_one_node)
+    return const1_rtx;
+  return const0_rtx;
+}
+
+/* Return a one or zero if it can be determined that object ARG1 of size ARG 
+   is lock free on this architecture.  */
+
+static tree
+fold_builtin_atomic_is_lock_free (tree arg0, tree arg1)
+{
+  if (!flag_inline_atomics)
+    return NULL_TREE;
+  
+  /* If it isn't always lock free, don't generate a result.  */
+  if (fold_builtin_atomic_always_lock_free (arg0, arg1) == integer_one_node)
+    return integer_one_node;
+
+  return NULL_TREE;
+}
+
+/* Return true if the parameters to call EXP represent an object which will
+   always generate lock free instructions.  The first argument represents the
+   size of the object, and the second parameter is a pointer to the object 
+   itself.  If NULL is passed for the object, then the result is based on 
+   typical alignment for an object of the specified size.  Otherwise return 
+   NULL*/
+
+static rtx
+expand_builtin_atomic_is_lock_free (tree exp)
+{
+  tree size;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
+    {
+      error ("non-integer argument 1 to __atomic_is_lock_free");
+      return NULL_RTX;
+    }
+
+  if (!flag_inline_atomics)
+    return NULL_RTX; 
+
+  /* If the value is known at compile time, return the RTX for it.  */
+  size = fold_builtin_atomic_is_lock_free (arg0, arg1);
+  if (size == integer_one_node)
+    return const1_rtx;
+
+  return NULL_RTX;
+}
+
+/* This routine will either emit the mem_thread_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_thread_fence
+# define HAVE_mem_thread_fence 0
+# define gen_mem_thread_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+void
+expand_builtin_mem_thread_fence (enum memmodel model)
+{
+  if (HAVE_mem_thread_fence)
+    emit_insn (gen_mem_thread_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    expand_builtin_sync_synchronize ();
+}
+
+/* Expand the __atomic_thread_fence intrinsic:
+   	void __atomic_thread_fence (enum memmodel)
+   EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_atomic_thread_fence (tree exp)
+{
+  enum memmodel model;
+  
+  model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+  expand_builtin_mem_thread_fence (model);
+}
+
+/* This routine will either emit the mem_signal_fence pattern or issue a 
+   sync_synchronize to generate a fence for memory model MEMMODEL.  */
+
+#ifndef HAVE_mem_signal_fence
+# define HAVE_mem_signal_fence 0
+# define gen_mem_signal_fence(x) (gcc_unreachable (), NULL_RTX)
+#endif
+
+static void
+expand_builtin_mem_signal_fence (enum memmodel model)
+{
+  if (HAVE_mem_signal_fence)
+    emit_insn (gen_mem_signal_fence (GEN_INT (model)));
+  else if (model != MEMMODEL_RELAXED)
+    {
+      rtx asm_op, clob;
+
+      /* By default targets are coherent between a thread and the signal
+	 handler running on the same thread.  Thus this really becomes a
+	 compiler barrier, in that stores must not be sunk past
+	 (or raised above) a given point.  */
+
+      /* Generate asm volatile("" : : : "memory") as the memory barrier.  */
+      asm_op = gen_rtx_ASM_OPERANDS (VOIDmode, empty_string, empty_string, 0,
+				     rtvec_alloc (0), rtvec_alloc (0),
+				     rtvec_alloc (0), UNKNOWN_LOCATION);
+      MEM_VOLATILE_P (asm_op) = 1;
+
+      clob = gen_rtx_SCRATCH (VOIDmode);
+      clob = gen_rtx_MEM (BLKmode, clob);
+      clob = gen_rtx_CLOBBER (VOIDmode, clob);
+
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob)));
+    }
+}
+
+/* Expand the __atomic_signal_fence intrinsic:
+   	void __atomic_signal_fence (enum memmodel)
+   EXP is the CALL_EXPR.  */
+
+static void
+expand_builtin_atomic_signal_fence (tree exp)
+{
+  enum memmodel model;
+
+  model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+  expand_builtin_mem_signal_fence (model);
 }
 
 /* Expand the __sync_synchronize intrinsic.  */
@@ -5264,33 +5702,6 @@ expand_builtin_sync_synchronize (void)
   expand_asm_stmt (x);
 }
 
-/* Expand the __sync_lock_release intrinsic.  EXP is the CALL_EXPR.  */
-
-static void
-expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
-{
-  struct expand_operand ops[2];
-  enum insn_code icode;
-  rtx mem;
-
-  /* Expand the operands.  */
-  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
-
-  /* If there is an explicit operation in the md file, use it.  */
-  icode = direct_optab_handler (sync_lock_release_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      create_fixed_operand (&ops[0], mem);
-      create_input_operand (&ops[1], const0_rtx, mode);
-      if (maybe_expand_insn (icode, 2, ops))
-	return;
-    }
-
-  /* Otherwise we can implement this operation by emitting a barrier
-     followed by a store of zero.  */
-  expand_builtin_sync_synchronize ();
-  emit_move_insn (mem, const0_rtx);
-}
 
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
@@ -5891,8 +6302,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_ADD_8:
     case BUILT_IN_SYNC_FETCH_AND_ADD_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_ADD_1);
-      target = expand_builtin_sync_operation (mode, exp, PLUS,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, PLUS, false, target);
       if (target)
 	return target;
       break;
@@ -5903,8 +6313,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_SUB_8:
     case BUILT_IN_SYNC_FETCH_AND_SUB_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_SUB_1);
-      target = expand_builtin_sync_operation (mode, exp, MINUS,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, MINUS, false, target);
       if (target)
 	return target;
       break;
@@ -5915,8 +6324,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_OR_8:
     case BUILT_IN_SYNC_FETCH_AND_OR_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_OR_1);
-      target = expand_builtin_sync_operation (mode, exp, IOR,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, IOR, false, target);
       if (target)
 	return target;
       break;
@@ -5927,8 +6335,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_AND_8:
     case BUILT_IN_SYNC_FETCH_AND_AND_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_AND_1);
-      target = expand_builtin_sync_operation (mode, exp, AND,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, AND, false, target);
       if (target)
 	return target;
       break;
@@ -5939,8 +6346,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_XOR_8:
     case BUILT_IN_SYNC_FETCH_AND_XOR_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_XOR_1);
-      target = expand_builtin_sync_operation (mode, exp, XOR,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, XOR, false, target);
       if (target)
 	return target;
       break;
@@ -5951,8 +6357,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_FETCH_AND_NAND_8:
     case BUILT_IN_SYNC_FETCH_AND_NAND_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_FETCH_AND_NAND_1);
-      target = expand_builtin_sync_operation (mode, exp, NOT,
-					      false, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, NOT, false, target);
       if (target)
 	return target;
       break;
@@ -5963,8 +6368,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_ADD_AND_FETCH_8:
     case BUILT_IN_SYNC_ADD_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_ADD_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, PLUS,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, PLUS, true, target);
       if (target)
 	return target;
       break;
@@ -5975,8 +6379,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_SUB_AND_FETCH_8:
     case BUILT_IN_SYNC_SUB_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_SUB_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, MINUS,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, MINUS, true, target);
       if (target)
 	return target;
       break;
@@ -5987,8 +6390,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_OR_AND_FETCH_8:
     case BUILT_IN_SYNC_OR_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_OR_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, IOR,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, IOR, true, target);
       if (target)
 	return target;
       break;
@@ -5999,8 +6401,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_AND_AND_FETCH_8:
     case BUILT_IN_SYNC_AND_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_AND_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, AND,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, AND, true, target);
       if (target)
 	return target;
       break;
@@ -6011,8 +6412,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_XOR_AND_FETCH_8:
     case BUILT_IN_SYNC_XOR_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_XOR_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, XOR,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, XOR, true, target);
       if (target)
 	return target;
       break;
@@ -6023,8 +6423,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_SYNC_NAND_AND_FETCH_8:
     case BUILT_IN_SYNC_NAND_AND_FETCH_16:
       mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_NAND_AND_FETCH_1);
-      target = expand_builtin_sync_operation (mode, exp, NOT,
-					      true, target, ignore);
+      target = expand_builtin_sync_operation (mode, exp, NOT, true, target);
       if (target)
 	return target;
       break;
@@ -6082,6 +6481,236 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
       expand_builtin_sync_synchronize ();
       return const0_rtx;
 
+    case BUILT_IN_ATOMIC_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_EXCHANGE_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_EXCHANGE_1);
+      target = expand_builtin_atomic_exchange (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
+      mode = 
+	  get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1);
+      target = expand_builtin_atomic_compare_exchange (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_LOAD_1:
+    case BUILT_IN_ATOMIC_LOAD_2:
+    case BUILT_IN_ATOMIC_LOAD_4:
+    case BUILT_IN_ATOMIC_LOAD_8:
+    case BUILT_IN_ATOMIC_LOAD_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_LOAD_1);
+      target = expand_builtin_atomic_load (mode, exp, target);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_STORE_1:
+    case BUILT_IN_ATOMIC_STORE_2:
+    case BUILT_IN_ATOMIC_STORE_4:
+    case BUILT_IN_ATOMIC_STORE_8:
+    case BUILT_IN_ATOMIC_STORE_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_STORE_1);
+      target = expand_builtin_atomic_store (mode, exp);
+      if (target)
+	return const0_rtx;
+      break;
+
+    case BUILT_IN_ATOMIC_ADD_FETCH_1:
+    case BUILT_IN_ATOMIC_ADD_FETCH_2:
+    case BUILT_IN_ATOMIC_ADD_FETCH_4:
+    case BUILT_IN_ATOMIC_ADD_FETCH_8:
+    case BUILT_IN_ATOMIC_ADD_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_ADD_1 + 
+				       (fcode - BUILT_IN_ATOMIC_ADD_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_SUB_FETCH_1:
+    case BUILT_IN_ATOMIC_SUB_FETCH_2:
+    case BUILT_IN_ATOMIC_SUB_FETCH_4:
+    case BUILT_IN_ATOMIC_SUB_FETCH_8:
+    case BUILT_IN_ATOMIC_SUB_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_SUB_1 + 
+				       (fcode - BUILT_IN_ATOMIC_SUB_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_AND_FETCH_1:
+    case BUILT_IN_ATOMIC_AND_FETCH_2:
+    case BUILT_IN_ATOMIC_AND_FETCH_4:
+    case BUILT_IN_ATOMIC_AND_FETCH_8:
+    case BUILT_IN_ATOMIC_AND_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_AND_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_AND_1 + 
+				       (fcode - BUILT_IN_ATOMIC_AND_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_NAND_FETCH_1:
+    case BUILT_IN_ATOMIC_NAND_FETCH_2:
+    case BUILT_IN_ATOMIC_NAND_FETCH_4:
+    case BUILT_IN_ATOMIC_NAND_FETCH_8:
+    case BUILT_IN_ATOMIC_NAND_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_NAND_1 + 
+				       (fcode - BUILT_IN_ATOMIC_NAND_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_XOR_FETCH_1:
+    case BUILT_IN_ATOMIC_XOR_FETCH_2:
+    case BUILT_IN_ATOMIC_XOR_FETCH_4:
+    case BUILT_IN_ATOMIC_XOR_FETCH_8:
+    case BUILT_IN_ATOMIC_XOR_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_XOR_1 + 
+				       (fcode - BUILT_IN_ATOMIC_XOR_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_OR_FETCH_1:
+    case BUILT_IN_ATOMIC_OR_FETCH_2:
+    case BUILT_IN_ATOMIC_OR_FETCH_4:
+    case BUILT_IN_ATOMIC_OR_FETCH_8:
+    case BUILT_IN_ATOMIC_OR_FETCH_16:
+      {
+	enum built_in_function lib;
+	mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_OR_FETCH_1);
+	lib = (enum built_in_function)((int)BUILT_IN_ATOMIC_FETCH_OR_1 + 
+				       (fcode - BUILT_IN_ATOMIC_OR_FETCH_1));
+	target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, true,
+						 ignore, lib);
+	if (target)
+	  return target;
+	break;
+      }
+    case BUILT_IN_ATOMIC_FETCH_ADD_1:
+    case BUILT_IN_ATOMIC_FETCH_ADD_2:
+    case BUILT_IN_ATOMIC_FETCH_ADD_4:
+    case BUILT_IN_ATOMIC_FETCH_ADD_8:
+    case BUILT_IN_ATOMIC_FETCH_ADD_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_ADD_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, PLUS, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_SUB_1:
+    case BUILT_IN_ATOMIC_FETCH_SUB_2:
+    case BUILT_IN_ATOMIC_FETCH_SUB_4:
+    case BUILT_IN_ATOMIC_FETCH_SUB_8:
+    case BUILT_IN_ATOMIC_FETCH_SUB_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_SUB_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, MINUS, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_ATOMIC_FETCH_AND_1:
+    case BUILT_IN_ATOMIC_FETCH_AND_2:
+    case BUILT_IN_ATOMIC_FETCH_AND_4:
+    case BUILT_IN_ATOMIC_FETCH_AND_8:
+    case BUILT_IN_ATOMIC_FETCH_AND_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_AND_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, AND, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+  
+    case BUILT_IN_ATOMIC_FETCH_NAND_1:
+    case BUILT_IN_ATOMIC_FETCH_NAND_2:
+    case BUILT_IN_ATOMIC_FETCH_NAND_4:
+    case BUILT_IN_ATOMIC_FETCH_NAND_8:
+    case BUILT_IN_ATOMIC_FETCH_NAND_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_NAND_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, NOT, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_XOR_1:
+    case BUILT_IN_ATOMIC_FETCH_XOR_2:
+    case BUILT_IN_ATOMIC_FETCH_XOR_4:
+    case BUILT_IN_ATOMIC_FETCH_XOR_8:
+    case BUILT_IN_ATOMIC_FETCH_XOR_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_XOR_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, XOR, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_FETCH_OR_1:
+    case BUILT_IN_ATOMIC_FETCH_OR_2:
+    case BUILT_IN_ATOMIC_FETCH_OR_4:
+    case BUILT_IN_ATOMIC_FETCH_OR_8:
+    case BUILT_IN_ATOMIC_FETCH_OR_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_ATOMIC_FETCH_OR_1);
+      target = expand_builtin_atomic_fetch_op (mode, exp, target, IOR, false,
+					       ignore, BUILT_IN_NONE);
+      if (target)
+	return target;
+      break;
+ 
+    case BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE:
+      return expand_builtin_atomic_always_lock_free (exp);
+
+    case BUILT_IN_ATOMIC_IS_LOCK_FREE:
+      target = expand_builtin_atomic_is_lock_free (exp);
+      if (target)
+        return target;
+      break;
+
+    case BUILT_IN_ATOMIC_THREAD_FENCE:
+      expand_builtin_atomic_thread_fence (exp);
+      return const0_rtx;
+
+    case BUILT_IN_ATOMIC_SIGNAL_FENCE:
+      expand_builtin_atomic_signal_fence (exp);
+      return const0_rtx;
+
     case BUILT_IN_OBJECT_SIZE:
       return expand_builtin_object_size (exp);
 
@@ -10121,6 +10750,12 @@ fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1, bool ignore)
       return fold_builtin_fprintf (loc, fndecl, arg0, arg1, NULL_TREE,
 				   ignore, fcode);
 
+    case BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE:
+      return fold_builtin_atomic_always_lock_free (arg0, arg1);
+
+    case BUILT_IN_ATOMIC_IS_LOCK_FREE:
+      return fold_builtin_atomic_is_lock_free (arg0, arg1);
+
     default:
       break;
     }
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index d7ff089..1c8a6b4 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,24 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* c-cppbuiltin.c (c_cpp_builtins): Test both atomic and sync patterns.
+	* c-common.c (sync_resolve_params, sync_resolve_return): Only tweak 
+	parameters that are the same type size.
+	(get_atomic_generic_size): New.  Find size of generic
+	atomic function parameters and do typechecking.
+	(add_atomic_size_parameter): New.  Insert size into parameter list.
+	(resolve_overloaded_atomic_exchange): Restructure __atomic_exchange to
+	either __atomic_exchange_n or external library call.
+	(resolve_overloaded_atomic_compare_exchange): Restructure 
+	__atomic_compare_exchange to either _n variant or external library call.
+	(resolve_overloaded_atomic_load): Restructure __atomic_load to either 
+	__atomic_load_n or an external library call.
+	(resolve_overloaded_atomic_store): Restructure __atomic_store to either
+	__atomic_store_n or an external library call.
+	(resolve_overloaded_builtin): Handle new __atomic builtins.
+
 2011-11-04  Eric Botcazou  <ebotcazou@adacore.com>
 
 	PR c++/50608
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index df8dda4..aa5f3bf 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -9007,7 +9007,8 @@ sync_resolve_size (tree function, VEC(tree,gc) *params)
    was encountered; true on success.  */
 
 static bool
-sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
+sync_resolve_params (location_t loc, tree orig_function, tree function,
+		     VEC(tree, gc) *params, bool orig_format)
 {
   function_args_iterator iter;
   tree ptype;
@@ -9035,21 +9036,34 @@ sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
       ++parmnum;
       if (VEC_length (tree, params) <= parmnum)
 	{
-	  error ("too few arguments to function %qE", orig_function);
+	  error_at (loc, "too few arguments to function %qE", orig_function);
 	  return false;
 	}
 
-      /* ??? Ideally for the first conversion we'd use convert_for_assignment
-	 so that we get warnings for anything that doesn't match the pointer
-	 type.  This isn't portable across the C and C++ front ends atm.  */
-      val = VEC_index (tree, params, parmnum);
-      val = convert (ptype, val);
-      val = convert (arg_type, val);
-      VEC_replace (tree, params, parmnum, val);
+      /* Only convert parameters if the size is appropriate with new format
+	 sync routines.  */
+      if (orig_format
+	  || tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (arg_type)))
+	{
+	  /* Ideally for the first conversion we'd use convert_for_assignment
+	     so that we get warnings for anything that doesn't match the pointer
+	     type.  This isn't portable across the C and C++ front ends atm.  */
+	  val = VEC_index (tree, params, parmnum);
+	  val = convert (ptype, val);
+	  val = convert (arg_type, val);
+	  VEC_replace (tree, params, parmnum, val);
+	}
 
       function_args_iter_next (&iter);
     }
 
+  /* __atomic routines are not variadic.  */
+  if (!orig_format && VEC_length (tree, params) != parmnum + 1)
+    {
+      error_at (loc, "too many arguments to function %qE", orig_function);
+      return false;
+    }
+
   /* The definition of these primitives is variadic, with the remaining
      being "an optional list of variables protected by the memory barrier".
      No clue what that's supposed to mean, precisely, but we consider all
@@ -9064,13 +9078,388 @@ sync_resolve_params (tree orig_function, tree function, VEC(tree, gc) *params)
    PARAMS.  */
 
 static tree
-sync_resolve_return (tree first_param, tree result)
+sync_resolve_return (tree first_param, tree result, bool orig_format)
 {
   tree ptype = TREE_TYPE (TREE_TYPE (first_param));
+  tree rtype = TREE_TYPE (result);
   ptype = TYPE_MAIN_VARIANT (ptype);
-  return convert (ptype, result);
+
+  /* New format doesn't require casting unless the types are the same size.  */
+  if (orig_format || tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (rtype)))
+    return convert (ptype, result);
+  else
+    return result;
+}
+
+/* This function verifies the PARAMS to generic atomic FUNCTION.
+   It returns the size if all the parameters are the same size, otherwise
+   0 is returned if the parameters are invalid.  */
+
+static int
+get_atomic_generic_size (location_t loc, tree function, VEC(tree,gc) *params)
+{
+  unsigned int n_param;
+  unsigned int n_model;
+  unsigned int x;
+  int size_0;
+  tree type_0;
+
+  /* Determine the parameter makeup.  */
+  switch (DECL_FUNCTION_CODE (function))
+    {
+    case BUILT_IN_ATOMIC_EXCHANGE:
+      n_param = 4;
+      n_model = 1;
+      break;
+    case BUILT_IN_ATOMIC_LOAD:
+    case BUILT_IN_ATOMIC_STORE:
+      n_param = 3;
+      n_model = 1;
+      break;
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+      n_param = 6;
+      n_model = 2;
+      break;
+    default:
+      return 0;
+    }
+
+  if (VEC_length (tree, params) != n_param)
+    {
+      error_at (loc, "incorrect number of arguments to function %qE", function);
+      return 0;
+    }
+
+  /* Get type of first parameter, and determine its size.  */
+  type_0 = TREE_TYPE (VEC_index (tree, params, 0));
+  if (TREE_CODE (type_0) != POINTER_TYPE)
+    {
+      error_at (loc, "argument 1 of %qE must be a pointer type", function);
+      return 0;
+    }
+  size_0 = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type_0)), 1);
+
+  /* Check each other parameter is a pointer and the same size.  */
+  for (x = 0; x < n_param - n_model; x++)
+    {
+      int size;
+      tree type = TREE_TYPE (VEC_index (tree, params, x));
+      /* __atomic_compare_exchange has a bool in the 4th postion, skip it.  */
+      if (n_param == 6 && x == 3)
+        continue;
+      if (!POINTER_TYPE_P (type))
+	{
+	  error_at (loc, "argument %d of %qE must be a pointer type", x + 1,
+		    function);
+	  return 0;
+	}
+      size = tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
+      if (size != size_0)
+	{
+	  error_at (loc, "size mismatch in argument %d of %qE", x + 1,
+		    function);
+	  return 0;
+	}
+    }
+
+  /* Check memory model parameters for validity.  */
+  for (x = n_param - n_model ; x < n_param; x++)
+    {
+      tree p = VEC_index (tree, params, x);
+      if (TREE_CODE (p) == INTEGER_CST)
+        {
+	  int i = tree_low_cst (p, 1);
+	  if (i < 0 || i >= MEMMODEL_LAST)
+	    {
+	      warning_at (loc, OPT_Winvalid_memory_model,
+			  "invalid memory model argument %d of %qE", x + 1,
+			  function);
+	      return MEMMODEL_SEQ_CST;
+	    }
+	}
+      else
+	if (!INTEGRAL_TYPE_P (TREE_TYPE (p)))
+	  {
+	    error_at (loc, "non-integer memory model argument %d of %qE", x + 1,
+		   function);
+	    return 0;
+	  }
+      }
+
+  return size_0;
+}
+
+
+/* This will take an __atomic_ generic FUNCTION call, and add a size parameter N
+   at the beginning of the parameter list PARAMS representing the size of the
+   objects.  This is to match the library ABI requirement.  LOC is the location
+   of the function call.  
+   The new function is returned if it needed rebuilding, otherwise NULL_TREE is
+   returned to allow the external call to be constructed.  */
+
+static tree
+add_atomic_size_parameter (unsigned n, location_t loc, tree function, 
+			   VEC(tree,gc) *params)
+{
+  tree size_node;
+
+  /* Insert a SIZE_T parameter as the first param.  If there isn't
+     enough space, allocate a new vector and recursively re-build with that.  */
+  if (!VEC_space (tree, params, 1))
+    {
+      unsigned int z, len;
+      VEC(tree,gc) *vec;
+      tree f;
+
+      len = VEC_length (tree, params);
+      vec = VEC_alloc (tree, gc, len + 1);
+      for (z = 0; z < len; z++)
+	VEC_quick_push (tree, vec, VEC_index (tree, params, z));
+      f = build_function_call_vec (loc, function, vec, NULL);
+      VEC_free (tree, gc, vec);
+      return f;
+    }
+
+  /* Add the size parameter and leave as a function call for processing.  */
+  size_node = build_int_cst (size_type_node, n);
+  VEC_quick_insert (tree, params, 0, size_node);
+  return NULL_TREE;
+}
+
+
+/* This will process an __atomic_exchange function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+static bool
+resolve_overloaded_atomic_exchange (location_t loc, tree function, 
+				    VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1, p2, p3;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise there is a lockfree match, transform the call from:
+       void fn(T* mem, T* desired, T* return, model)
+     into
+       *return = (T) (fn (In* mem, (In) *desired, model))  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  p3 = VEC_index (tree, params, 3);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert new value to required type, and dereference it.  */
+  p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
+  VEC_replace (tree, params, 1, p1);
+
+  /* Move memory model to the 3rd position, and end param list.  */
+  VEC_replace (tree, params, 2, p3);
+  VEC_truncate (tree, params, 3);
+
+  /* Convert return pointer and dereference it for later assignment.  */
+  *new_return = build_indirect_ref (loc, p2, RO_UNARY_STAR);
+
+  return false;
 }
 
+
+/* This will process an __atomic_compare_exchange function call, determine 
+   whether it needs to be mapped to the _N variation, or turned into a lib call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required.  */
+
+static bool
+resolve_overloaded_atomic_compare_exchange (location_t loc, tree function, 
+					    VEC(tree,gc) *params, 
+					    tree *new_return)
+{	
+  tree p0, p1, p2;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      /* The library generic format does not have the weak parameter, so 
+	 remove it from the param list.  Since a parameter has been removed,
+	 we can be sure that there is room for the SIZE_T parameter, meaning
+	 there will not be a recursive rebuilding of the parameter list, so
+	 there is no danger this will be done twice.  */
+      if (n > 0)
+        {
+	  VEC_replace (tree, params, 3, VEC_index (tree, params, 4));
+	  VEC_replace (tree, params, 4, VEC_index (tree, params, 5));
+	  VEC_truncate (tree, params, 5);
+	}
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       bool fn(T* mem, T* desired, T* return, weak, success, failure)
+     into
+       bool fn ((In *)mem, (In *)expected, (In) *desired, weak, succ, fail)  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert expected pointer to required type.  */
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p1);
+  VEC_replace (tree, params, 1, p1);
+
+  /* Convert desired value to required type, and dereference it.  */
+  p2 = build_indirect_ref (loc, p2, RO_UNARY_STAR);
+  p2 = build1 (VIEW_CONVERT_EXPR, I_type, p2);
+  VEC_replace (tree, params, 2, p2);
+
+  /* The rest of the parameters are fine. NULL means no special return value
+     processing.*/
+  *new_return = NULL;
+  return false;
+}
+
+
+/* This will process an __atomic_load function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+
+static bool
+resolve_overloaded_atomic_load (location_t loc, tree function, 
+				VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1, p2;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       void fn(T* mem, T* return, model)
+     into
+       *return = (T) (fn ((In *) mem, model))  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  p2 = VEC_index (tree, params, 2);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Move memory model to the 2nd position, and end param list.  */
+  VEC_replace (tree, params, 1, p2);
+  VEC_truncate (tree, params, 2);
+
+  /* Convert return pointer and dereference it for later assignment.  */
+  *new_return = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+
+  return false;
+}
+
+
+/* This will process an __atomic_store function call, determine whether it
+   needs to be mapped to the _N variation, or turned into a library call.
+   LOC is the location of the builtin call.
+   FUNCTION is the DECL that has been invoked;
+   PARAMS is the argument list for the call.  The return value is non-null
+   TRUE is returned if it is translated into the proper format for a call to the
+   external library, and NEW_RETURN is set the tree for that function.
+   FALSE is returned if processing for the _N variation is required, and 
+   NEW_RETURN is set to the the return value the result is copied into.  */
+
+static bool
+resolve_overloaded_atomic_store (location_t loc, tree function, 
+				 VEC(tree,gc) *params, tree *new_return)
+{	
+  tree p0, p1;
+  tree I_type, I_type_ptr;
+  int n = get_atomic_generic_size (loc, function, params);
+
+  /* If not a lock-free size, change to the library generic format.  */
+  if (n != 1 && n != 2 && n != 4 && n != 8 && n != 16)
+    {
+      *new_return = add_atomic_size_parameter (n, loc, function, params);
+      return true;
+    }
+
+  /* Otherwise, there is a match, so the call needs to be transformed from:
+       void fn(T* mem, T* value, model)
+     into
+       fn ((In *) mem, (In) *value, model)  */
+
+  p0 = VEC_index (tree, params, 0);
+  p1 = VEC_index (tree, params, 1);
+  
+  /* Create pointer to appropriate size.  */
+  I_type = builtin_type_for_size (BITS_PER_UNIT * n, 1);
+  I_type_ptr = build_pointer_type (I_type);
+
+  /* Convert object pointer to required type.  */
+  p0 = build1 (VIEW_CONVERT_EXPR, I_type_ptr, p0);
+  VEC_replace (tree, params, 0, p0);
+
+  /* Convert new value to required type, and dereference it.  */
+  p1 = build_indirect_ref (loc, p1, RO_UNARY_STAR);
+  p1 = build1 (VIEW_CONVERT_EXPR, I_type, p1);
+  VEC_replace (tree, params, 1, p1);
+  
+  /* The memory model is in the right spot already. Return is void.  */
+  *new_return = NULL_TREE;
+
+  return false;
+}
+
+
 /* Some builtin functions are placeholders for other expressions.  This
    function should be called immediately after parsing the call expression
    before surrounding code has committed to the type of the expression.
@@ -9086,6 +9475,9 @@ tree
 resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)
 {
   enum built_in_function orig_code = DECL_FUNCTION_CODE (function);
+  bool orig_format = true;
+  tree new_return = NULL_TREE;
+
   switch (DECL_BUILT_IN_CLASS (function))
     {
     case BUILT_IN_NORMAL:
@@ -9102,6 +9494,78 @@ resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)
   /* Handle BUILT_IN_NORMAL here.  */
   switch (orig_code)
     {
+    case BUILT_IN_ATOMIC_EXCHANGE:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+    case BUILT_IN_ATOMIC_LOAD:
+    case BUILT_IN_ATOMIC_STORE:
+      {
+	/* Handle these 4 together so that they can fall through to the next
+	   case if the call is transformed to an _N variant.  */
+        switch (orig_code)
+	{
+	  case BUILT_IN_ATOMIC_EXCHANGE:
+	    {
+	      if (resolve_overloaded_atomic_exchange (loc, function, params,
+						      &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_EXCHANGE_N;
+	      break;
+	    }
+
+	  case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
+	    {
+	      if (resolve_overloaded_atomic_compare_exchange (loc, function,
+							      params,
+							      &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N;
+	      break;
+	    }
+	  case BUILT_IN_ATOMIC_LOAD:
+	    {
+	      if (resolve_overloaded_atomic_load (loc, function, params,
+						  &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_LOAD_N;
+	      break;
+	    }
+	  case BUILT_IN_ATOMIC_STORE:
+	    {
+	      if (resolve_overloaded_atomic_store (loc, function, params,
+						   &new_return))
+		return new_return;
+	      /* Change to the _N variant.  */
+	      orig_code = BUILT_IN_ATOMIC_STORE_N;
+	      break;
+	    }
+	  default:
+	    gcc_unreachable ();
+	}
+	/* Fallthrough to the normal processing.  */
+      }
+    case BUILT_IN_ATOMIC_EXCHANGE_N:
+    case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
+    case BUILT_IN_ATOMIC_LOAD_N:
+    case BUILT_IN_ATOMIC_STORE_N:
+    case BUILT_IN_ATOMIC_ADD_FETCH_N:
+    case BUILT_IN_ATOMIC_SUB_FETCH_N:
+    case BUILT_IN_ATOMIC_AND_FETCH_N:
+    case BUILT_IN_ATOMIC_NAND_FETCH_N:
+    case BUILT_IN_ATOMIC_XOR_FETCH_N:
+    case BUILT_IN_ATOMIC_OR_FETCH_N:
+    case BUILT_IN_ATOMIC_FETCH_ADD_N:
+    case BUILT_IN_ATOMIC_FETCH_SUB_N:
+    case BUILT_IN_ATOMIC_FETCH_AND_N:
+    case BUILT_IN_ATOMIC_FETCH_NAND_N:
+    case BUILT_IN_ATOMIC_FETCH_XOR_N:
+    case BUILT_IN_ATOMIC_FETCH_OR_N:
+      {
+        orig_format = false;
+	/* Fallthru for parameter processing.  */
+      }
     case BUILT_IN_SYNC_FETCH_AND_ADD_N:
     case BUILT_IN_SYNC_FETCH_AND_SUB_N:
     case BUILT_IN_SYNC_FETCH_AND_OR_N:
@@ -9128,15 +9592,31 @@ resolve_overloaded_builtin (location_t loc, tree function, VEC(tree,gc) *params)
 
 	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
 	new_function = builtin_decl_explicit (fncode);
-	if (!sync_resolve_params (function, new_function, params))
+	if (!sync_resolve_params (loc, function, new_function, params,
+				  orig_format))
 	  return error_mark_node;
 
 	first_param = VEC_index (tree, params, 0);
 	result = build_function_call_vec (loc, new_function, params, NULL);
+	if (result == error_mark_node)
+	  return result;
 	if (orig_code != BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N
-	    && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N)
-	  result = sync_resolve_return (first_param, result);
+	    && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N
+	    && orig_code != BUILT_IN_ATOMIC_STORE_N)
+	  result = sync_resolve_return (first_param, result, orig_format);
 
+	/* If new_return is set, assign function to that expr and cast the
+	   result to void since the generic interface returned void.  */
+	if (new_return)
+	  {
+	    /* Cast function result from I{1,2,4,8,16} to the required type.  */
+	    result = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (new_return), result);
+	    result = build2 (MODIFY_EXPR, TREE_TYPE (new_return), new_return,
+			     result);
+	    TREE_SIDE_EFFECTS (result) = 1;
+	    protected_set_expr_location (result, loc);
+	    result = convert (void_type_node, result);
+	  }
 	return result;
       }
 
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index bb9893a..bf83c26 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -758,30 +758,50 @@ c_cpp_builtins (cpp_reader *pfile)
 
   /* Tell source code if the compiler makes sync_compare_and_swap
      builtins available.  */
-#ifdef HAVE_sync_compare_and_swapqi
-  if (HAVE_sync_compare_and_swapqi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+#ifndef HAVE_sync_compare_and_swapqi
+#define HAVE_sync_compare_and_swapqi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapqi
+#define HAVE_atomic_compare_and_swapqi 0
 #endif
+  if (HAVE_sync_compare_and_swapqi || HAVE_atomic_compare_and_swapqi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
 
-#ifdef HAVE_sync_compare_and_swaphi
-  if (HAVE_sync_compare_and_swaphi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+#ifndef HAVE_sync_compare_and_swaphi
+#define HAVE_sync_compare_and_swaphi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swaphi
+#define HAVE_atomic_compare_and_swaphi 0
+#endif
+  if (HAVE_sync_compare_and_swaphi || HAVE_atomic_compare_and_swaphi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
 
-#ifdef HAVE_sync_compare_and_swapsi
-  if (HAVE_sync_compare_and_swapsi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+#ifndef HAVE_sync_compare_and_swapsi
+#define HAVE_sync_compare_and_swapsi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapsi
+#define HAVE_atomic_compare_and_swapsi 0
 #endif
+  if (HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
 
-#ifdef HAVE_sync_compare_and_swapdi
-  if (HAVE_sync_compare_and_swapdi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+#ifndef HAVE_sync_compare_and_swapdi
+#define HAVE_sync_compare_and_swapdi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swapdi
+#define HAVE_atomic_compare_and_swapdi 0
+#endif
+  if (HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 
-#ifdef HAVE_sync_compare_and_swapti
-  if (HAVE_sync_compare_and_swapti)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
+#ifndef HAVE_sync_compare_and_swapti
+#define HAVE_sync_compare_and_swapti 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapti
+#define HAVE_atomic_compare_and_swapti 0
 #endif
+  if (HAVE_sync_compare_and_swapti || HAVE_atomic_compare_and_swapti)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
 
 #ifdef DWARF2_UNWIND_INFO
   if (dwarf2out_do_cfi_asm ())
diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c
index 46363c0..392ac65 100644
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@@ -2717,6 +2717,10 @@ build_function_call_vec (location_t loc, tree function, VEC(tree,gc) *params,
 
       name = DECL_NAME (function);
       fundecl = function;
+      /* Atomic functions have type checking/casting already done.  They are 
+	 often rewritten and don't match the original parameter list.  */
+      if (name && !strncmp (IDENTIFIER_POINTER (name), "__atomic_", 9))
+        origtypes = NULL;
     }
   if (TREE_CODE (TREE_TYPE (function)) == FUNCTION_TYPE)
     function = function_to_pointer_conversion (loc, function);
diff --git a/gcc/common.opt b/gcc/common.opt
index 13bbf9e..1871054 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -551,6 +551,10 @@ Winline
 Common Var(warn_inline) Warning
 Warn when an inlined function cannot be inlined
 
+Winvalid-memory-model
+Common Var(warn_invalid_memory_model) Init(1) Warning
+Warn when an atomic memory model parameter is known to be outside the valid range.
+
 Wlarger-than-
 Common RejectNegative Joined Warning Undocumented Alias(Wlarger-than=)
 
@@ -1266,6 +1270,10 @@ finline-limit=
 Common RejectNegative Joined UInteger
 -finline-limit=<number>	Limit the size of inlined functions to <number>
 
+finline-atomics
+Common Report Var(flag_inline_atomics) Init(1) Optimization
+Inline __atomic operations when a lock free instruction sequence is available.
+
 finstrument-functions
 Common Report Var(flag_instrument_function_entry_exit)
 Instrument function entry and exit with profiling calls
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index dc2bf28..39c4cd7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -248,6 +248,9 @@
   ;; For BMI2 support
   UNSPEC_PDEP
   UNSPEC_PEXT
+
+  ;; For __atomic support
+  UNSPEC_MOVA
 ])
 
 (define_c_enum "unspecv" [
@@ -262,7 +265,10 @@
   UNSPECV_ALIGN
   UNSPECV_MONITOR
   UNSPECV_MWAIT
-  UNSPECV_CMPXCHG
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+  UNSPECV_CMPXCHG_4
   UNSPECV_XCHG
   UNSPECV_LOCK
   UNSPECV_PROLOGUE_USE
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 20378d0..0ff1712 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -18,31 +18,27 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_iterator CASMODE
-  [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
-	    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_iterator DCASMODE
-  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
-   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
-(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
-
-(define_expand "memory_barrier"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
   ""
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) != MEMMODEL_SEQ_CST)
+    DONE;
 
-  if (!(TARGET_64BIT || TARGET_SSE2))
+  if (TARGET_64BIT || TARGET_SSE2)
+    emit_insn (gen_sse2_mfence ());
+  else
     {
-      emit_insn (gen_memory_barrier_nosse (operands[0]));
-      DONE;
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mfence_nosse (mem));
     }
+  DONE;
 })
 
-(define_insn "memory_barrier_nosse"
+(define_insn "mfence_nosse"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
    (clobber (reg:CC FLAGS_REG))]
@@ -50,127 +46,315 @@
   "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
   [(set_attr "memory" "unknown")])
 
-;; ??? It would be possible to use cmpxchg8b on pentium for DImode
-;; changes.  It's complicated because the insn uses ecx:ebx as the
-;; new value; note that the registers are reversed from the order
-;; that they'd be in with (reg:DI 2 ecx).  Similarly for TImode
-;; data in 64-bit mode.
-
-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:CASMODE 0 "register_operand" "")
-	  (match_operand:CASMODE 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:CASMODE
-	    [(match_dup 1)
-	     (match_operand:CASMODE 2 "register_operand" "")
-	     (match_operand:CASMODE 3 "register_operand" "")]
-	    UNSPECV_CMPXCHG))
-   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:CASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))])]
-  "TARGET_CMPXCHG"
+;; ??? From volume 3 section 7.1.1 Guaranteed Atomic Operations,
+;; Only beginning at Pentium family processors do we get any guarantee of
+;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
+;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
+;;
+;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
+;;
+;; Importantly, *no* processor makes atomicity guarantees for larger
+;; accesses.  In particular, there's no way to perform an atomic TImode
+;; move, despite the apparent applicability of MOVDQA et al.
+
+(define_mode_iterator ATOMIC
+   [QI HI SI
+    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
+   ])
+
+(define_expand "atomic_load<mode>"
+  [(set (match_operand:ATOMIC 0 "register_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    emit_insn (gen_atomic_loaddi_fpu
+	       (operands[0], operands[1],
+	        assign_386_stack_local (DImode,
+					(virtuals_instantiated
+					 ? SLOT_TEMP : SLOT_VIRTUAL))));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "atomic_loaddi_fpu"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (SSE_REG_P (dst))
+    emit_move_insn (dst, src);
+  else
+    {
+      if (MEM_P (dst))
+	mem = dst;
+
+      if (FP_REG_P (tmp))
+	emit_insn (gen_movdi_via_fpu (mem, src, tmp));
+      else
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, src);
+	  emit_move_insn (mem, tmp);
+	}
+
+      if (mem != dst)
+	emit_move_insn (dst, mem);
+    }
+  DONE;
+})
+
+(define_expand "atomic_store<mode>"
+  [(set (match_operand:ATOMIC 0 "memory_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    {
+      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
+      /* Note that while we could perform a cmpxchg8b loop, that turns
+	 out to be significantly larger than this plus a barrier.  */
+      emit_insn (gen_atomic_storedi_fpu
+		 (operands[0], operands[1],
+	          assign_386_stack_local (DImode,
+					  (virtuals_instantiated
+					   ? SLOT_TEMP : SLOT_VIRTUAL))));
+    }
+  else
+    {
+      /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
+      if (model == MEMMODEL_SEQ_CST && !(TARGET_64BIT || TARGET_SSE2))
+	{
+	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
+						operands[0], operands[1],
+						operands[2]));
+	  DONE;
+	}
+
+      /* Otherwise use a normal store.  */
+      emit_move_insn (operands[0], operands[1]);
+    }
+  /* ... followed by an MFENCE, if required.  */
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_mem_thread_fence (operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "atomic_storedi_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "x,m,?r")]
+		   UNSPEC_MOVA))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
+   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
 {
-  if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+  rtx dst = operands[0], src = operands[1];
+  rtx mem = operands[2], tmp = operands[3];
+
+  if (!SSE_REG_P (src))
     {
-      enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
-      rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
-      rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
-				      GET_MODE_SIZE (hmode));
-      low = force_reg (hmode, low);
-      high = force_reg (hmode, high);
-      if (<MODE>mode == DImode)
+      if (REG_P (src))
+	{
+	  emit_move_insn (mem, src);
+	  src = mem;
+	}
+
+      if (FP_REG_P (tmp))
 	{
-	  if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
-	    operands[1] = replace_equiv_address (operands[1],
-						 force_reg (Pmode,
-							    XEXP (operands[1],
-								  0)));
-	  emit_insn (gen_sync_double_compare_and_swapdi
-		     (operands[0], operands[1], operands[2], low, high));
+	  emit_insn (gen_movdi_via_fpu (dst, src, tmp));
+	  DONE;
 	}
-      else if (<MODE>mode == TImode)
-	emit_insn (gen_sync_double_compare_and_swapti
-		   (operands[0], operands[1], operands[2], low, high));
       else
-	gcc_unreachable ();
-      DONE;
+	{
+	  adjust_reg_mode (tmp, DImode);
+	  emit_move_insn (tmp, mem);
+	  src = tmp;
+	}
     }
+  emit_move_insn (dst, src);
+  DONE;
+})
+
+;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
+;; operations.  But the fix_trunc patterns want way more setup than we want
+;; to provide.  Note that the scratch is DFmode instead of XFmode in order
+;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
+(define_insn "movdi_via_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_MOVA))
+   (clobber (match_operand:DF 2 "register_operand" "=f"))]
+  "TARGET_80387"
+  "fild\t%1\;fistp\t%0"
+  [(set_attr "type" "multi")
+   ;; Worst case based on full sib+offset32 addressing modes
+   (set_attr "length" "14")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:SWI124 1 "register_operand" "")	;; oldval output
+   (match_operand:SWI124 2 "memory_operand" "")		;; memory
+   (match_operand:SWI124 3 "register_operand" "")	;; expected input
+   (match_operand:SWI124 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  emit_insn (gen_atomic_compare_and_swap_single<mode>
+	     (operands[1], operands[2], operands[3], operands[4]));
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
 })
 
-(define_insn "*sync_compare_and_swap<mode>"
+(define_mode_iterator CASMODE
+  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:CASMODE 1 "register_operand" "")	;; oldval output
+   (match_operand:CASMODE 2 "memory_operand" "")	;; memory
+   (match_operand:CASMODE 3 "register_operand" "")	;; expected input
+   (match_operand:CASMODE 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  if (<MODE>mode == DImode && TARGET_64BIT)
+    {
+      emit_insn (gen_atomic_compare_and_swap_singledi
+		 (operands[1], operands[2], operands[3], operands[4]));
+    }
+  else
+    {
+      enum machine_mode hmode = <DCASHMODE>mode;
+      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
+
+      lo_o = operands[1];
+      mem  = operands[2];
+      lo_e = operands[3];
+      lo_n = operands[4];
+      hi_o = gen_highpart (hmode, lo_o);
+      hi_e = gen_highpart (hmode, lo_e);
+      hi_n = gen_highpart (hmode, lo_n);
+      lo_o = gen_lowpart (hmode, lo_o);
+      lo_e = gen_lowpart (hmode, lo_e);
+      lo_n = gen_lowpart (hmode, lo_n);
+
+      if (<MODE>mode == DImode
+	  && !TARGET_64BIT
+	  && flag_pic
+	  && !cmpxchg8b_pic_memory_operand (mem, DImode))
+	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+      emit_insn (gen_atomic_compare_and_swap_double<mode>
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
+    }
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swap_single<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=a")
-	(match_operand:SWI 1 "memory_operand" "+m"))
-   (set (match_dup 1)
 	(unspec_volatile:SWI
-	  [(match_dup 1)
-	   (match_operand:SWI 2 "register_operand" "a")
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SWI 2 "register_operand" "0")
 	   (match_operand:SWI 3 "register_operand" "<r>")]
-	  UNSPECV_CMPXCHG))
+	  UNSPECV_CMPXCHG_1))
+   (set (match_dup 1)
+	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:SWI
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
   "TARGET_CMPXCHG"
   "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
-(define_insn "sync_double_compare_and_swap<mode>"
-  [(set (match_operand:DCASMODE 0 "register_operand" "=A")
-	(match_operand:DCASMODE 1 "memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DCASMODE
-	  [(match_dup 1)
-	   (match_operand:DCASMODE 2 "register_operand" "A")
-	   (match_operand:<DCASHMODE> 3 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+;; For double-word compare and swap, we are obliged to play tricks with
+;; the input newval (op5:op6) because the Intel register numbering does
+;; not match the gcc register numbering, so the pair must be CX:BX.
+;; That said, in order to take advantage of possible lower-subreg opts,
+;; treat all of the integral operands in the same way.
+(define_insn "atomic_compare_and_swap_double<mode>"
+  [(set (match_operand:<DCASHMODE> 0 "register_operand" "=a")
+	(unspec_volatile:<DCASHMODE>
+	  [(match_operand:DCASMODE 2 "memory_operand" "+m")
+	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
+	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
+	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
+	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:DCASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%1")
-
-;; Theoretically we'd like to use constraint "r" (any reg) for operand
-;; 3, but that includes ecx.  If operand 3 and 4 are the same (like when
-;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
-;; operand 4.  This breaks, as the xchg will move the PIC register contents
-;; to %ecx then --> boom.  Operands 3 and 4 really need to be different
-;; registers, which in this case means operand 3 must not be ecx.
-;; Instead of playing tricks with fake early clobbers or the like we
-;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
+
+;; Theoretically we'd like to use constraint "r" (any reg) for op5,
+;; but that includes ecx.  If op5 and op6 are the same (like when
+;; the input is -1LL) GCC might chose to allocate op5 to ecx, like
+;; op6.  This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom.  Operands 5 and 6 really need to be different
+;; registers, which in this case means op5 must not be ecx.  Instead
+;; of playing tricks with fake early clobbers or the like we just
+;; enumerate all regs possible here, which (as this is !TARGET_64BIT)
 ;; are just esi and edi.
-(define_insn "*sync_double_compare_and_swapdi_pic"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DI
-	  [(match_dup 1)
-	   (match_operand:DI 2 "register_operand" "A")
-	   (match_operand:SI 3 "register_operand" "SD")
-	   (match_operand:SI 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+(define_insn "*atomic_compare_and_swap_doubledi_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI
+	  [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m")
+	   (match_operand:SI 3 "register_operand" "0")
+	   (match_operand:SI 4 "register_operand" "1")
+	   (match_operand:SI 5 "register_operand" "SD")
+	   (match_operand:SI 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:SI 1 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-	(compare:CCZ
-	  (unspec_volatile:DI
-	    [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-	  (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
-  "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+  "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
-(define_insn "sync_old_add<mode>"
+(define_insn "atomic_fetch_add<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=<r>")
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
 	(plus:SWI (match_dup 1)
 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
@@ -186,7 +370,9 @@
 	(match_operand:SWI 2 "const_int_operand" ""))
    (parallel [(set (match_dup 0)
 		   (unspec_volatile:SWI
-		     [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG))
+		     [(match_operand:SWI 1 "memory_operand" "")
+		      (match_operand:SI 4 "const_int_operand" "")]
+		     UNSPECV_XCHG))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 0)))
@@ -199,17 +385,19 @@
       == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
   [(parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (unspec_volatile:SWI [(match_dup 1)]
-						     UNSPECV_XCHG)
-				(match_dup 3)))
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
+					  UNSPECV_XCHG)
+		     (match_dup 3)))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 2)))])])
 
-(define_insn "*sync_old_add_cmp<mode>"
+(define_insn "*atomic_fetch_add_cmp<mode>"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (unspec_volatile:SWI
-		       [(match_operand:SWI 0 "memory_operand" "+m")]
+		       [(match_operand:SWI 0 "memory_operand" "+m")
+		        (match_operand:SI 3 "const_int_operand" "")]
 		       UNSPECV_XCHG)
 		     (match_operand:SWI 2 "const_int_operand" "i")))
    (set (match_dup 0)
@@ -233,20 +421,24 @@
 })
 
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
-(define_insn "sync_lock_test_and_set<mode>"
-  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+;; In addition, it is always a full barrier, so we can ignore the memory model.
+(define_insn "atomic_exchange<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
-	(match_operand:SWI 2 "register_operand" "0"))]
+	(match_operand:SWI 2 "register_operand" "0"))]		;; input
   ""
   "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
 
-(define_insn "sync_add<mode>"
+(define_insn "atomic_add<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(plus:SWI (match_dup 0)
-		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -265,11 +457,12 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_sub<mode>"
+(define_insn "atomic_sub<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(minus:SWI (match_dup 0)
-		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -282,14 +475,18 @@
 	return "lock{%;} inc{<imodesuffix>}\t%0";
     }
 
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
   return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_<code><mode>"
+(define_insn "atomic_<code><mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(any_logic:SWI (match_dup 0)
-			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index 45cdbbd..1374a98 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -181,5 +181,18 @@ union _dont_use_tree_here_;
 
 #endif
 
+/* Memory model types for the __atomic* builtins. 
+   This must match the order in libstdc++-v3/include/bits/atomic_base.h.  */
+enum memmodel
+{
+  MEMMODEL_RELAXED = 0,
+  MEMMODEL_CONSUME = 1,
+  MEMMODEL_ACQUIRE = 2,
+  MEMMODEL_RELEASE = 3,
+  MEMMODEL_ACQ_REL = 4,
+  MEMMODEL_SEQ_CST = 5,
+  MEMMODEL_LAST = 6
+};
+
 #endif /* coretypes.h */
 
diff --git a/gcc/cppbuiltin.c b/gcc/cppbuiltin.c
index cf7d2ff..05d82f5 100644
--- a/gcc/cppbuiltin.c
+++ b/gcc/cppbuiltin.c
@@ -66,6 +66,12 @@ define__GNUC__ (cpp_reader *pfile)
   cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
   cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
   cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
+  cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
+  cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+  cpp_define_formatted (pfile, "__ATOMIC_RELEASE=%d", MEMMODEL_RELEASE);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQ_REL=%d", MEMMODEL_ACQ_REL);
+  cpp_define_formatted (pfile, "__ATOMIC_CONSUME=%d", MEMMODEL_CONSUME);
 }
 
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index c605462..91e4e32 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -79,7 +79,8 @@ extensions, accepted by GCC in C90 mode and in C++.
 * Return Address::      Getting the return or frame address of a function.
 * Vector Extensions::   Using vector instructions through built-in functions.
 * Offsetof::            Special syntax for implementing @code{offsetof}.
-* Atomic Builtins::     Built-in functions for atomic memory access.
+* __sync Builtins::     Legacy built-in functions for atomic memory access.
+* __atomic Builtins::   Atomic built-in functions with memory model.
 * Object Size Checking:: Built-in functions for limited buffer overflow
                         checking.
 * Other Builtins::      Other built-in functions.
@@ -6683,8 +6684,8 @@ is a suitable definition of the @code{offsetof} macro.  In C++, @var{type}
 may be dependent.  In either case, @var{member} may consist of a single
 identifier, or a sequence of member accesses and array references.
 
-@node Atomic Builtins
-@section Built-in functions for atomic memory access
+@node __sync Builtins
+@section Legacy __sync built-in functions for atomic memory access
 
 The following builtins are intended to be compatible with those described
 in the @cite{Intel Itanium Processor-specific Application Binary Interface},
@@ -6816,6 +6817,238 @@ previous memory loads have been satisfied, but following memory reads
 are not prevented from being speculated to before the barrier.
 @end table
 
+@node __atomic Builtins
+@section Built-in functions for memory model aware atomic operations
+
+The following built-in functions approximately match the requirements for
+C++11 memory model. Many are similar to the @samp{__sync} prefixed built-in
+functions, but all also have a memory model parameter.  These are all
+identified by being prefixed with @samp{__atomic}, and most are overloaded
+such that they work with multiple types.
+
+GCC will allow any integral scalar or pointer type that is 1, 2, 4, or 8
+bytes in length. 16-byte integral types are also allowed if
+@samp{__int128} (@pxref{__int128}) is supported by the architecture.
+
+Target architectures are encouraged to provide their own patterns for
+each of these built-in functions.  If no target is provided, the original 
+non-memory model set of @samp{__sync} atomic built-in functions will be
+utilized, along with any required synchronization fences surrounding it in
+order to achieve the proper behaviour.  Execution in this case is subject
+to the same restrictions as those built-in functions.
+
+If there is no pattern or mechanism to provide a lock free instruction
+sequence, a call is made to an external routine with the same parameters
+to be resolved at runtime.
+
+The four non-arithmetic functions (load, store, exchange, and 
+compare_exchange) all have a generic version as well.  This generic
+version will work on any data type.  If the data type size maps to one
+of the integral sizes which may have lock free support, the generic
+version will utilize the lock free built-in function.  Otherwise an
+external call is left to be resolved at runtime.  This external call will
+be the same format with the addition of a @samp{size_t} parameter inserted
+as the first parameter indicating the size of the object being pointed to.
+All objects must be the same size.
+
+There are 6 different memory models which can be specified.  These map
+to the same names in the C++11 standard.  Refer there or to the
+@uref{http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync,GCC wiki on
+atomic synchronization} for more detailed definitions.  These memory
+models integrate both barriers to code motion as well as synchronization
+requirements with other threads. These are listed in approximately
+ascending order of strength.
+
+@table  @code
+@item __ATOMIC_RELAXED
+No barriers or synchronization.
+@item __ATOMIC_CONSUME
+Data dependency only for both barrier and synchronization with another
+thread.
+@item __ATOMIC_ACQUIRE
+Barrier to hoisting of code and synchronizes with release (or stronger)
+semantic stores from another thread.
+@item __ATOMIC_RELEASE
+Barrier to sinking of code and synchronizes with acquire (or stronger)
+semantic loads from another thread.
+@item __ATOMIC_ACQ_REL
+Full barrier in both directions and synchronizes with acquire loads and
+release stores in another thread.
+@item __ATOMIC_SEQ_CST
+Full barrier in both directions and synchronizes with acquire loads and
+release stores in all threads.
+@end table
+
+When implementing patterns for these built-in functions , the memory model
+parameter can be ignored as long as the pattern implements the most
+restrictive @code{__ATOMIC_SEQ_CST} model.  Any of the other memory models
+will execute correctly with this memory model but they may not execute as
+efficiently as they could with a more appropriate implemention of the
+relaxed requirements.
+
+Note that the C++11 standard allows for the memory model parameter to be
+determined at runtime rather than at compile time.  These built-in
+functions will map any runtime value to @code{__ATOMIC_SEQ_CST} rather
+than invoke a runtime library call or inline a switch statement.  This is
+standard compliant, safe, and the simplest approach for now.
+
+@deftypefn {Built-in Function} @var{type} __atomic_load_n (@var{type} *ptr, int memmodel)
+This built-in function implements an atomic load operation.  It returns the
+contents of @code{*@var{ptr}}.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
+and @code{__ATOMIC_CONSUME}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_load (@var{type} *ptr, @var{type} *ret, int memmodel)
+This is the generic version of an atomic load.  It will return the
+contents of @code{*@var{ptr}} in @code{*@var{ret}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr, @var{type} val, int memmodel)
+This built-in function implements an atomic store operation.  It writes 
+@code{@var{val}} into @code{*@var{ptr}}.  On targets which are limited,
+0 may be the only valid value. This mimics the behaviour of
+@code{__sync_lock_release} on such hardware.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_store (@var{type} *ptr, @var{type} *val, int memmodel)
+This is the generic version of an atomic store.  It will store the value
+of @code{*@var{val}} into @code{*@var{ptr}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_exchange_n (@var{type} *ptr, @var{type} val, int memmodel)
+This built-in function implements an atomic exchange operation.  It writes
+@var{val} into @code{*@var{ptr}}, and returns the previous contents of
+@code{*@var{ptr}}.
+
+On targets which are limited, a value of 1 may be the only valid value
+written.  This mimics the behaviour of @code{__sync_lock_test_and_set} on
+such hardware.
+
+The valid memory model variants are
+@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
+@code{__ATOMIC_RELEASE}, and @code{__ATOMIC_ACQ_REL}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_exchange (@var{type} *ptr, @var{type} *val, @var{type} *ret, int memmodel)
+This is the generic version of an atomic exchange.  It will store the
+contents of @code{*@var{val}} into @code{*@var{ptr}}. The original value
+of @code{*@var{ptr}} will be copied into @code{*@var{ret}}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_compare_exchange_n (@var{type} *ptr, @var{type} *expected, @var{type} desired, bool weak, int success_memmodel, int failure_memmodel)
+This built-in function implements an atomic compare and exchange operation.
+This compares the contents of @code{*@var{ptr}} with the contents of
+@code{*@var{expected}} and if equal, writes @var{desired} into
+@code{*@var{ptr}}.  If they are not equal, the current contents of
+@code{*@var{ptr}} is written into @code{*@var{expected}}.
+
+True is returned if @code{*@var{desired}} is written into
+@code{*@var{ptr}} and the execution is considered to conform to the
+memory model specified by @var{success_memmodel}.  There are no
+restrictions on what memory model can be used here.
+
+False is returned otherwise, and the execution is considered to conform
+to @var{failure_memmodel}. This memory model cannot be
+@code{__ATOMIC_RELEASE} nor @code{__ATOMIC_ACQ_REL}.  It also cannot be a
+stronger model than that specified by @var{success_memmodel}.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_compare_exchange (@var{type} *ptr, @var{type} *expected, @var{type} *desired, bool weak, int success_memmodel, int failure_memmodel)
+This built-in function implements the generic version of
+@code{__atomic_compare_exchange}.  The function is virtually identical to
+@code{__atomic_compare_exchange_n}, except the desired value is also a
+pointer.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_add_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_sub_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_and_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_xor_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_or_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_nand_fetch (@var{type} *ptr, @var{type} val, int memmodel)
+These built-in functions perform the operation suggested by the name, and
+return the result of the operation. That is,
+
+@smallexample
+@{ *ptr @var{op}= val; return *ptr; @}
+@end smallexample
+
+All memory models are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} @var{type} __atomic_fetch_add (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_sub (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_and (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_xor (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_or (@var{type} *ptr, @var{type} val, int memmodel)
+@deftypefnx {Built-in Function} @var{type} __atomic_fetch_nand (@var{type} *ptr, @var{type} val, int memmodel)
+These built-in functions perform the operation suggested by the name, and
+return the value that had previously been in @code{*@var{ptr}}.  That is,
+
+@smallexample
+@{ tmp = *ptr; *ptr @var{op}= val; return tmp; @}
+@end smallexample
+
+All memory models are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_thread_fence (int memmodel)
+
+This built-in function acts as a synchronization fence between threads
+based on the specified memory model.
+
+All memory orders are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __atomic_signal_fence (int memmodel)
+
+This built-in function acts as a synchronization fence between a thread
+and signal handlers based in the same thread.
+
+All memory orders are valid.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_always_lock_free (size_t size)
+
+This built-in function returns true if objects of size bytes will always
+generate lock free atomic instructions for the target architecture.
+Otherwise false is returned.
+
+size must resolve to a compile time constant.
+
+@smallexample
+if (_atomic_always_lock_free (sizeof (long long)))
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __atomic_is_lock_free (size_t size)
+
+This built-in function returns true if objects of size bytes will always
+generate lock free atomic instructions for the target architecture.  If
+it is not known to be lock free a call is made to a runtime routine named
+@code{__atomic_is_lock_free}.
+
+@end deftypefn
+
 @node Object Size Checking
 @section Object Size Checking Builtins
 @findex __builtin_object_size
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 780d5c8..e3fb21b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9163,11 +9163,26 @@ The maximum number of conditional stores paires that can be sunk.  Set to 0
 if either vectorization (@option{-ftree-vectorize}) or if-conversion
 (@option{-ftree-loop-if-convert}) is disabled.  The default is 2.
 
+@item allow-load-data-races
+Allow optimizers to introduce new data races on loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
 @item allow-store-data-races
 Allow optimizers to introduce new data races on stores.
 Set to 1 to allow, otherwise to 0.  This option is enabled by default
 unless implicitly set by the @option{-fmemory-model=} option.
 
+@item allow-packed-load-data-races
+Allow optimizers to introduce new data races on packed data loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
+@item allow-packed-store-data-races
+Allow optimizers to introduce new data races on packed data stores.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
 @item case-values-threshold
 The smallest number of different values for which it is best to use a
 jump-table instead of a tree of conditional branches.  If the value is
@@ -13180,7 +13195,8 @@ This option will enable GCC to use CMPXCHG16B instruction in generated code.
 CMPXCHG16B allows for atomic operations on 128-bit double quadword (or oword)
 data types.  This is useful for high resolution counters that could be updated
 by multiple processors (or cores).  This instruction is generated as part of
-atomic built-in functions: see @ref{Atomic Builtins} for details.
+atomic built-in functions: see @ref{__sync Builtins} or
+@ref{__atomic Builtins} for details.
 
 @item -msahf
 @opindex msahf
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index a51e7cf..6b75f2b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5699,6 +5699,155 @@ released only after all previous memory operations have completed.
 If this pattern is not defined, then a @code{memory_barrier} pattern
 will be emitted, followed by a store of the value to the memory operand.
 
+@cindex @code{atomic_compare_and_swap@var{mode}} instruction pattern
+@item @samp{atomic_compare_and_swap@var{mode}} 
+This pattern, if defined, emits code for an atomic compare-and-swap
+operation with memory model semantics.  Operand 2 is the memory on which
+the atomic operation is performed.  Operand 0 is an output operand which
+is set to true or false based on whether the operation succeeded.  Operand
+1 is an output operand which is set to the contents of the memory before
+the operation was attempted.  Operand 3 is the value that is expected to
+be in memory.  Operand 4 is the value to put in memory if the expected
+value is found there.  Operand 5 is set to 1 if this compare and swap is to
+be treated as a weak operation.  Operand 6 is the memory model to be used
+if the operation is a success.  Operand 7 is the memory model to be used
+if the operation fails.
+
+If memory referred to in operand 2 contains the value in operand 3, then
+operand 4 is stored in memory pointed to by operand 2 and fencing based on
+the memory model in operand 6 is issued.  
+
+If memory referred to in operand 2 does not contain the value in operand 3,
+then fencing based on the memory model in operand 7 is issued.
+
+If a target does not support weak compare-and-swap operations, or the port
+elects not to implement weak operations, the argument in operand 5 can be
+ignored.  Note a strong implementation must be provided.
+
+If this pattern is not provided, the @code{__atomic_compare_exchange}
+built-in functions will utilize the legacy @code{sync_compare_and_swap}
+pattern with an @code{__ATOMIC_SEQ_CST} memory model.
+
+@cindex @code{atomic_load@var{mode}} instruction pattern
+@item @samp{atomic_load@var{mode}}
+This pattern implements an atomic load operation with memory model
+semantics.  Operand 1 is the memory address being loaded from.  Operand 0
+is the result of the load.  Operand 2 is the memory model to be used for
+the load operation.
+
+If not present, the @code{__atomic_load} built-in function will either
+resort to a normal load with memory barriers, or a compare-and-swap
+operation if a normal load would not be atomic.
+
+@cindex @code{atomic_store@var{mode}} instruction pattern
+@item @samp{atomic_store@var{mode}}
+This pattern implements an atomic store operation with memory model
+semantics.  Operand 0 is the memory address being stored to.  Operand 1
+is the value to be written.  Operand 2 is the memory model to be used for
+the operation.
+
+If not present, the @code{__atomic_store} built-in function will attempt to
+perform a normal store and surround it with any required memory fences.  If
+the store would not be atomic, then an @code{__atomic_exchange} is
+attempted with the result being ignored.
+
+@cindex @code{atomic_exchange@var{mode}} instruction pattern
+@item @samp{atomic_exchange@var{mode}}
+This pattern implements an atomic exchange operation with memory model
+semantics.  Operand 1 is the memory location the operation is performed on.
+Operand 0 is an output operand which is set to the original value contained
+in the memory pointed to by operand 1.  Operand 2 is the value to be
+stored.  Operand 3 is the memory model to be used.
+
+If this pattern is not present, the built-in function
+@code{__atomic_exchange} will attempt to preform the operation with a
+compare and swap loop.
+
+@cindex @code{atomic_add@var{mode}} instruction pattern
+@cindex @code{atomic_sub@var{mode}} instruction pattern
+@cindex @code{atomic_or@var{mode}} instruction pattern
+@cindex @code{atomic_and@var{mode}} instruction pattern
+@cindex @code{atomic_xor@var{mode}} instruction pattern
+@cindex @code{atomic_nand@var{mode}} instruction pattern
+@item @samp{atomic_add@var{mode}}, @samp{atomic_sub@var{mode}}
+@itemx @samp{atomic_or@var{mode}}, @samp{atomic_and@var{mode}}
+@itemx @samp{atomic_xor@var{mode}}, @samp{atomic_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics. Operand 0 is the memory on which the atomic operation is
+performed.  Operand 1 is the second operand to the binary operator.
+Operand 2 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return a result.  If
+none of these are available a compare-and-swap loop will be used.
+
+@cindex @code{atomic_fetch_add@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_sub@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_or@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_and@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_xor@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_nand@var{mode}} instruction pattern
+@item @samp{atomic_fetch_add@var{mode}}, @samp{atomic_fetch_sub@var{mode}}
+@itemx @samp{atomic_fetch_or@var{mode}}, @samp{atomic_fetch_and@var{mode}}
+@itemx @samp{atomic_fetch_xor@var{mode}}, @samp{atomic_fetch_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics, and return the original value. Operand 0 is an output 
+operand which contains the value of the memory location before the 
+operation was performed.  Operand 1 is the memory on which the atomic 
+operation is performed.  Operand 2 is the second operand to the binary
+operator.  Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns.  If none of these are available a compare-and-swap
+loop will be used.
+
+@cindex @code{atomic_add_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_sub_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_or_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_and_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_xor_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_nand_fetch@var{mode}} instruction pattern
+@item @samp{atomic_add_fetch@var{mode}}, @samp{atomic_sub_fetch@var{mode}}
+@itemx @samp{atomic_or_fetch@var{mode}}, @samp{atomic_and_fetch@var{mode}}
+@itemx @samp{atomic_xor_fetch@var{mode}}, @samp{atomic_nand_fetch@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics and return the result after the operation is performed.
+Operand 0 is an output operand which contains the value after the
+operation.  Operand 1 is the memory on which the atomic operation is
+performed.  Operand 2 is the second operand to the binary operator.
+Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return the result before
+the operation followed by the arithmetic operation required to produce the
+result.  If none of these are available a compare-and-swap loop will be
+used.
+
+@cindex @code{mem_thread_fence@var{mode}} instruction pattern
+@item @samp{mem_thread_fence@var{mode}}
+This pattern emits code required to implement a thread fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
+@cindex @code{mem_signal_fence@var{mode}} instruction pattern
+@item @samp{mem_signal_fence@var{mode}}
+This pattern emits code required to implement a signal fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+This pattern should impact the compiler optimizers the same way that
+mem_signal_fence does, but it does not need to issue any barrier
+instructions.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
 @cindex @code{stack_protect_set} instruction pattern
 @item @samp{stack_protect_set}
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 1bf1369..1623ad9 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -212,11 +212,17 @@ int can_conditionally_move_p (enum machine_mode mode);
 rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 			  rtx, rtx, enum machine_mode, int);
 
-rtx expand_val_compare_and_swap (rtx, rtx, rtx, rtx);
-rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx);
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
-rtx expand_sync_lock_test_and_set (rtx, rtx, rtx);
+
+rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
+rtx expand_atomic_load (rtx, rtx, enum memmodel);
+rtx expand_atomic_store (rtx, rtx, enum memmodel);
+rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
+			      bool);
+void expand_atomic_thread_fence (enum memmodel);
+void expand_atomic_signal_fence (enum memmodel);
+
 
 /* Functions from expmed.c:  */
 
@@ -248,6 +254,7 @@ extern void expand_builtin_setjmp_receiver (rtx);
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
+extern void expand_builtin_mem_thread_fence (enum memmodel);
 
 /* Functions from expr.c:  */
 
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 5b1d410..b5c8bed 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,16 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* types.def: (BT_SIZE, BT_CONST_VOLATILE_PTR, BT_FN_VOID_INT,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT): New types.
+
 2011-11-04  Mikael Morin  <mikael@gcc.gnu.org>
 
 	PR fortran/43829
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index 5bcdb52..a2762c6 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -57,6 +57,7 @@ DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
+DEF_PRIMITIVE_TYPE (BT_SIZE, size_type_node)
 
 DEF_PRIMITIVE_TYPE (BT_I1, builtin_type_for_size (BITS_PER_UNIT*1, 1))
 DEF_PRIMITIVE_TYPE (BT_I2, builtin_type_for_size (BITS_PER_UNIT*2, 1))
@@ -70,7 +71,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
                     build_pointer_type
                      (build_qualified_type (void_type_node,
                                             TYPE_QUAL_VOLATILE)))
-
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG)
 DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG)
 DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR)
@@ -85,6 +89,8 @@ DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR, BT_VOID, BT_PTR_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+
 
 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)
 
@@ -98,6 +104,21 @@ DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, BT_I4, BT_VOLATILE_PTR, BT_I4)
 DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, BT_I8, BT_VOLATILE_PTR, BT_I8)
 DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_I16, BT_I16, BT_VOLATILE_PTR, BT_I16)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTR, BT_VOID, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)
+
 
 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)
 
@@ -119,15 +140,31 @@ DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_I16, BT_I16, BT_VOLATILE_PTR,
 		     BT_I16, BT_I16)
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
                      BT_PTR, BT_UINT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)
 
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
                      BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)
 
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR,
                      BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
@@ -138,6 +175,23 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)
 
 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 4c64842..44eba24 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -243,6 +243,28 @@ static const char * const optabs[] =
   "set_direct_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
   "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
   "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))",
+  "set_direct_optab_handler (atomic_exchange_optab, $A, CODE_FOR_$(atomic_exchange$I$a$))",
+  "set_direct_optab_handler (atomic_compare_and_swap_optab, $A, CODE_FOR_$(atomic_compare_and_swap$I$a$))",
+  "set_direct_optab_handler (atomic_load_optab, $A, CODE_FOR_$(atomic_load$I$a$))",
+  "set_direct_optab_handler (atomic_store_optab, $A, CODE_FOR_$(atomic_store$I$a$))",
+  "set_direct_optab_handler (atomic_add_fetch_optab, $A, CODE_FOR_$(atomic_add_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_sub_fetch_optab, $A, CODE_FOR_$(atomic_sub_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_and_fetch_optab, $A, CODE_FOR_$(atomic_and_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_nand_fetch_optab, $A, CODE_FOR_$(atomic_nand_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_xor_fetch_optab, $A, CODE_FOR_$(atomic_xor_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_or_fetch_optab, $A, CODE_FOR_$(atomic_or_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_add_optab, $A, CODE_FOR_$(atomic_fetch_add$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_sub_optab, $A, CODE_FOR_$(atomic_fetch_sub$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_and_optab, $A, CODE_FOR_$(atomic_fetch_and$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_nand_optab, $A, CODE_FOR_$(atomic_fetch_nand$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_xor_optab, $A, CODE_FOR_$(atomic_fetch_xor$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_or_optab, $A, CODE_FOR_$(atomic_fetch_or$I$a$))",
+  "set_direct_optab_handler (atomic_add_optab, $A, CODE_FOR_$(atomic_add$I$a$))",
+  "set_direct_optab_handler (atomic_sub_optab, $A, CODE_FOR_$(atomic_sub$I$a$))",
+  "set_direct_optab_handler (atomic_and_optab, $A, CODE_FOR_$(atomic_and$I$a$))",
+  "set_direct_optab_handler (atomic_nand_optab, $A, CODE_FOR_$(atomic_nand$I$a$))",
+  "set_direct_optab_handler (atomic_xor_optab, $A, CODE_FOR_$(atomic_xor$I$a$))",
+  "set_direct_optab_handler (atomic_or_optab, $A, CODE_FOR_$(atomic_or$I$a$))",
   "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))",
   "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
   "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))",
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 05a3493..d8e7ce3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4998,7 +4998,7 @@ expand_omp_atomic_store (basic_block load_bb, tree addr)
 }
 
 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
-   operation as a __sync_fetch_and_op builtin.  INDEX is log2 of the
+   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
    size of the data type, and thus usable to find the index of the builtin
    decl.  Returns false if the expression is not of the proper form.  */
 
@@ -5009,13 +5009,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
 {
   enum built_in_function oldbase, newbase, tmpbase;
   tree decl, itype, call;
-  direct_optab optab, oldoptab, newoptab;
   tree lhs, rhs;
   basic_block store_bb = single_succ (load_bb);
   gimple_stmt_iterator gsi;
   gimple stmt;
   location_t loc;
+  enum tree_code code;
   bool need_old, need_new;
+  enum machine_mode imode;
 
   /* We expect to find the following sequences:
 
@@ -5047,47 +5048,34 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
     return false;
 
   /* Check for one of the supported fetch-op operations.  */
-  switch (gimple_assign_rhs_code (stmt))
+  code = gimple_assign_rhs_code (stmt);
+  switch (code)
     {
     case PLUS_EXPR:
     case POINTER_PLUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_ADD_N;
-      newbase = BUILT_IN_SYNC_ADD_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
+      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
       break;
     case MINUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_SUB_N;
-      newbase = BUILT_IN_SYNC_SUB_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
+      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
       break;
     case BIT_AND_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_AND_N;
-      newbase = BUILT_IN_SYNC_AND_AND_FETCH_N;
-      optab = sync_and_optab;
-      oldoptab = sync_old_and_optab;
-      newoptab = sync_new_and_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
+      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
       break;
     case BIT_IOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_OR_N;
-      newbase = BUILT_IN_SYNC_OR_AND_FETCH_N;
-      optab = sync_ior_optab;
-      oldoptab = sync_old_ior_optab;
-      newoptab = sync_new_ior_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
+      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
       break;
     case BIT_XOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_XOR_N;
-      newbase = BUILT_IN_SYNC_XOR_AND_FETCH_N;
-      optab = sync_xor_optab;
-      oldoptab = sync_old_xor_optab;
-      newoptab = sync_new_xor_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
+      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
       break;
     default:
       return false;
     }
+
   /* Make sure the expression is of the proper form.  */
   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
     rhs = gimple_assign_rhs2 (stmt);
@@ -5103,37 +5091,25 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   if (decl == NULL_TREE)
     return false;
   itype = TREE_TYPE (TREE_TYPE (decl));
+  imode = TYPE_MODE (itype);
 
-  if (need_new)
-    {
-      /* expand_sync_fetch_operation can always compensate when interested
-	 in the new value.  */
-      if (direct_optab_handler (newoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	     == CODE_FOR_nothing)
-	return false;
-    }
-  else if (need_old)
-    {
-      /* When interested in the old value, expand_sync_fetch_operation
-	 can compensate only if the operation is reversible.  AND and OR
-	 are not reversible.  */
-      if (direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && (oldbase == BUILT_IN_SYNC_FETCH_AND_AND_N
-	      || oldbase == BUILT_IN_SYNC_FETCH_AND_OR_N
-	      || direct_optab_handler (newoptab, TYPE_MODE (itype))
-		 == CODE_FOR_nothing))
-	return false;
-    }
-  else if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
+  /* We could test all of the various optabs involved, but the fact of the
+     matter is that (with the exception of i486 vs i586 and xadd) all targets
+     that support any atomic operaton optab also implements compare-and-swap.
+     Let optabs.c take care of expanding any compare-and-swap loop.  */
+  if (!can_compare_and_swap_p (imode))
     return false;
 
   gsi = gsi_last_bb (load_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
-  call = build_call_expr_loc (loc, decl, 2, addr,
-			      fold_convert_loc (loc, itype, rhs));
+
+  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
+     It only requires that the operation happen atomically.  Thus we can
+     use the RELAXED memory model.  */
+  call = build_call_expr_loc (loc, decl, 3, addr,
+			      fold_convert_loc (loc, itype, rhs),
+			      build_int_cst (NULL, MEMMODEL_RELAXED));
+
   if (need_old || need_new)
     {
       lhs = need_old ? loaded_val : stored_val;
@@ -5182,6 +5158,8 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   edge e;
   enum built_in_function fncode;
 
+  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
+     order to use the RELAXED memory model effectively.  */
   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
 				    + index + 1);
   cmpxchg = builtin_decl_explicit (fncode);
@@ -5190,8 +5168,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
 
-  if (direct_optab_handler (sync_compare_and_swap_optab, TYPE_MODE (itype))
-      == CODE_FOR_nothing)
+  if (!can_compare_and_swap_p (TYPE_MODE (itype)))
     return false;
 
   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
diff --git a/gcc/optabs.c b/gcc/optabs.c
index f07381c..163a449 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7162,43 +7162,25 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 }
 
 
-/* This is an internal subroutine of the other compare_and_swap expanders.
-   MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap
-   operation.  TARGET is an optional place to store the value result of
-   the operation.  ICODE is the particular instruction to expand.  Return
-   the result of the operation.  */
+/* Return true if there is a compare_and_swap pattern.  */
 
-static rtx
-expand_val_compare_and_swap_1 (rtx mem, rtx old_val, rtx new_val,
-			       rtx target, enum insn_code icode)
+bool
+can_compare_and_swap_p (enum machine_mode mode)
 {
-  struct expand_operand ops[4];
-  enum machine_mode mode = GET_MODE (mem);
-
-  create_output_operand (&ops[0], target, mode);
-  create_fixed_operand (&ops[1], mem);
-  /* OLD_VAL and NEW_VAL may have been promoted to a wider mode.
-     Shrink them if so.  */
-  create_convert_operand_to (&ops[2], old_val, mode, true);
-  create_convert_operand_to (&ops[3], new_val, mode, true);
-  if (maybe_expand_insn (icode, 4, ops))
-    return ops[0].value;
-  return NULL_RTX;
-}
-
-/* Expand a compare-and-swap operation and return its value.  */
+  enum insn_code icode;
 
-rtx
-expand_val_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode
-    = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  /* Check for __sync_compare_and_swap.  */
+  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;
 
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
+  /* Check for __atomic_compare_and_swap.  */
+  icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;
 
-  return expand_val_compare_and_swap_1 (mem, old_val, new_val, target, icode);
+  /* No inline compare and swap.  */
+  return false;
 }
 
 /* Helper function to find the MODE_CC set in a sync_compare_and_swap
@@ -7216,58 +7198,6 @@ find_cc_set (rtx x, const_rtx pat, void *data)
     }
 }
 
-/* Expand a compare-and-swap operation and store true into the result if
-   the operation was successful and false otherwise.  Return the result.
-   Unlike other routines, TARGET is not optional.  */
-
-rtx
-expand_bool_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx subtarget, seq, cc_reg;
-
-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
-
-  do_pending_stack_adjust ();
-  do
-    {
-      start_sequence ();
-      subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val,
-					         NULL_RTX, icode);
-      cc_reg = NULL_RTX;
-      if (subtarget == NULL_RTX)
-	{
-	  end_sequence ();
-	  return NULL_RTX;
-	}
-
-      if (have_insn_for (COMPARE, CCmode))
-	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-      seq = get_insns ();
-      end_sequence ();
-
-      /* We might be comparing against an old value.  Try again. :-(  */
-      if (!cc_reg && MEM_P (old_val))
-	{
-	  seq = NULL_RTX;
-	  old_val = force_reg (mode, old_val);
-        }
-    }
-  while (!seq);
-
-  emit_insn (seq);
-  if (cc_reg)
-    return emit_store_flag_force (target, EQ, cc_reg, const0_rtx, VOIDmode, 0, 1);
-  else
-    return emit_store_flag_force (target, EQ, subtarget, old_val, VOIDmode, 1, 1);
-}
-
 /* This is a helper function for the other atomic operations.  This function
    emits a loop that contains SEQ that iterates until a compare-and-swap
    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
@@ -7281,8 +7211,7 @@ static bool
 expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
 {
   enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx label, cmp_reg, subtarget, cc_reg;
+  rtx label, cmp_reg, success, oldval;
 
   /* The loop we want to generate looks like
 
@@ -7290,8 +7219,8 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
       label:
         old_reg = cmp_reg;
 	seq;
-	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
-	if (cmp_reg != old_reg)
+	(success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
+	if (success)
 	  goto label;
 
      Note that we only do the plain load from memory once.  Subsequent
@@ -7306,331 +7235,539 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
   if (seq)
     emit_insn (seq);
 
-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
+  success = NULL_RTX;
+  oldval = cmp_reg;
+  if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
+				       new_reg, false, MEMMODEL_SEQ_CST,
+				       MEMMODEL_RELAXED))
     return false;
 
-  subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg,
-					     cmp_reg, icode);
-  if (subtarget == NULL_RTX)
-    return false;
+  if (oldval != cmp_reg)
+    emit_move_insn (cmp_reg, oldval);
 
-  cc_reg = NULL_RTX;
-  if (have_insn_for (COMPARE, CCmode))
-    note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-  if (cc_reg)
+  /* ??? Mark this jump predicted not taken?  */
+  emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
+			   GET_MODE (success), 1, label);
+  return true;
+}
+
+
+/* This function expands the atomic exchange operation:
+   atomically store VAL in MEM and return the previous value in MEM.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an option place to stick the return value.  */
+
+rtx
+expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+  rtx last_insn;
+
+  /* If the target supports the exchange directly, great.  */
+  icode = direct_optab_handler (atomic_exchange_optab, mode);
+  if (icode != CODE_FOR_nothing)
     {
-      cmp_reg = cc_reg;
-      old_reg = const0_rtx;
+      struct expand_operand ops[4];
+
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+      create_convert_operand_to (&ops[2], val, mode, true);
+      create_integer_operand (&ops[3], model);
+      if (maybe_expand_insn (icode, 4, ops))
+	return ops[0].value;
     }
-  else
+
+  /* Legacy sync_lock_test_and_set works the same, but is only defined as an 
+     acquire barrier.  If the pattern exists, and the memory model is stronger
+     than acquire, add a release barrier before the instruction.
+     The barrier is not needed if sync_lock_test_and_set doesn't exist since
+     it will expand into a compare-and-swap loop.  */
+
+  icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
+  last_insn = get_last_insn ();
+  if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
+				      model == MEMMODEL_RELEASE ||
+				      model == MEMMODEL_ACQ_REL))
+    expand_builtin_mem_thread_fence (model);
+
+  if (icode != CODE_FOR_nothing)
     {
-      if (subtarget != cmp_reg)
-	emit_move_insn (cmp_reg, subtarget);
+      struct expand_operand ops[3];
+
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+      create_convert_operand_to (&ops[2], val, mode, true);
+      if (maybe_expand_insn (icode, 3, ops))
+	return ops[0].value;
     }
 
-  /* ??? Mark this jump predicted not taken?  */
-  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, const0_rtx, GET_MODE (cmp_reg), 1,
-			   label);
+  /* Remove any fence we may have inserted since a compare and swap loop is a
+     full memory barrier.  */
+  if (last_insn != get_last_insn ())
+    delete_insns_since (last_insn);
+
+  /* Otherwise, use a compare-and-swap loop for the exchange.  */
+  if (can_compare_and_swap_p (mode))
+    {
+      if (!target || !register_operand (target, mode))
+	target = gen_reg_rtx (mode);
+      if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
+	val = convert_modes (mode, GET_MODE (val), val, 1);
+      if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
+	return target;
+    }
+
+  return NULL_RTX;
+}
+
+/* This function expands the atomic compare exchange operation:
+
+   *PTARGET_BOOL is an optional place to store the boolean success/failure.
+   *PTARGET_OVAL is an optional place to store the old value from memory.
+   Both target parameters may be NULL to indicate that we do not care about
+   that return value.  Both target parameters are updated on success to
+   the actual location of the corresponding result.
+
+   MEMMODEL is the memory model variant to use.
+
+   The return value of the function is true for success.  */
+
+bool
+expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+				rtx mem, rtx expected, rtx desired,
+				bool is_weak, enum memmodel succ_model,
+				enum memmodel fail_model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct expand_operand ops[8];
+  enum insn_code icode;
+  rtx target_bool, target_oval;
+
+  /* Load expected into a register for the compare and swap.  */
+  if (MEM_P (expected))
+    expected = copy_to_reg (expected);
+
+  /* Make sure we always have some place to put the return oldval.
+     Further, make sure that place is distinct from the input expected,
+     just in case we need that path down below.  */
+  if (ptarget_oval == NULL
+      || (target_oval = *ptarget_oval) == NULL
+      || reg_overlap_mentioned_p (expected, target_oval))
+    target_oval = gen_reg_rtx (mode);
+
+  icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      enum machine_mode bool_mode = insn_data[icode].operand[0].mode;
+
+      /* Make sure we always have a place for the bool operand.  */
+      if (ptarget_bool == NULL
+	  || (target_bool = *ptarget_bool) == NULL
+	  || GET_MODE (target_bool) != bool_mode)
+	target_bool = gen_reg_rtx (bool_mode);
+
+      /* Emit the compare_and_swap.  */
+      create_output_operand (&ops[0], target_bool, bool_mode);
+      create_output_operand (&ops[1], target_oval, mode);
+      create_fixed_operand (&ops[2], mem);
+      create_convert_operand_to (&ops[3], expected, mode, true);
+      create_convert_operand_to (&ops[4], desired, mode, true);
+      create_integer_operand (&ops[5], is_weak);
+      create_integer_operand (&ops[6], succ_model);
+      create_integer_operand (&ops[7], fail_model);
+      expand_insn (icode, 8, ops);
+
+      /* Return success/failure.  */
+      target_bool = ops[0].value;
+      target_oval = ops[1].value;
+      goto success;
+    }
+
+  /* Otherwise fall back to the original __sync_val_compare_and_swap
+     which is always seq-cst.  */
+  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      rtx cc_reg;
+
+      create_output_operand (&ops[0], target_oval, mode);
+      create_fixed_operand (&ops[1], mem);
+      create_convert_operand_to (&ops[2], expected, mode, true);
+      create_convert_operand_to (&ops[3], desired, mode, true);
+      if (!maybe_expand_insn (icode, 4, ops))
+	return false;
+
+      target_oval = ops[0].value;
+      target_bool = NULL_RTX;
+
+      /* If the caller isn't interested in the boolean return value,
+	 skip the computation of it.  */
+      if (ptarget_bool == NULL)
+	goto success;
+
+      /* Otherwise, work out if the compare-and-swap succeeded.  */
+      cc_reg = NULL_RTX;
+      if (have_insn_for (COMPARE, CCmode))
+	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+
+      target_bool
+	= (cc_reg
+	   ? emit_store_flag_force (target_bool, EQ, cc_reg,
+				    const0_rtx, VOIDmode, 0, 1)
+	   : emit_store_flag_force (target_bool, EQ, target_oval,
+				    expected, VOIDmode, 1, 1));
+      goto success;
+    }
+  return false;
+
+ success:
+  /* Make sure that the oval output winds up where the caller asked.  */
+  if (ptarget_oval)
+    *ptarget_oval = target_oval;
+  if (ptarget_bool)
+    *ptarget_bool = target_bool;
   return true;
 }
 
-/* This function generates the atomic operation MEM CODE= VAL.  In this
-   case, we do not care about any resulting value.  Returns NULL if we
-   cannot generate the operation.  */
+/* This function expands the atomic load operation:
+   return the atomically loaded value in MEM.
+
+   MEMMODEL is the memory model variant to use.
+   TARGET is an option place to stick the return value.  */
 
 rtx
-expand_sync_operation (rtx mem, rtx val, enum rtx_code code)
+expand_atomic_load (rtx target, rtx mem, enum memmodel model)
 {
   enum machine_mode mode = GET_MODE (mem);
   enum insn_code icode;
-  rtx insn;
 
-  /* Look to see if the target supports the operation directly.  */
-  switch (code)
+  /* If the target supports the load directly, great.  */
+  icode = direct_optab_handler (atomic_load_optab, mode);
+  if (icode != CODE_FOR_nothing)
     {
-    case PLUS:
-      icode = direct_optab_handler (sync_add_optab, mode);
-      break;
-    case IOR:
-      icode = direct_optab_handler (sync_ior_optab, mode);
-      break;
-    case XOR:
-      icode = direct_optab_handler (sync_xor_optab, mode);
-      break;
-    case AND:
-      icode = direct_optab_handler (sync_and_optab, mode);
-      break;
-    case NOT:
-      icode = direct_optab_handler (sync_nand_optab, mode);
-      break;
+      struct expand_operand ops[3];
 
-    case MINUS:
-      icode = direct_optab_handler (sync_sub_optab, mode);
-      if (icode == CODE_FOR_nothing || CONST_INT_P (val))
-	{
-	  icode = direct_optab_handler (sync_add_optab, mode);
-	  if (icode != CODE_FOR_nothing)
-	    {
-	      val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
-	      code = PLUS;
-	    }
-	}
-      break;
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      create_integer_operand (&ops[2], model);
+      if (maybe_expand_insn (icode, 3, ops))
+	return ops[0].value;
+    }
 
-    default:
-      gcc_unreachable ();
+  /* If the size of the object is greater than word size on this target,
+     then we assume that a load will not be atomic.  */
+  if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
+    {
+      /* Issue val = compare_and_swap (mem, 0, 0).
+	 This may cause the occasional harmless store of 0 when the value is
+	 already 0, but it seems to be OK according to the standards guys.  */
+      expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
+				      const0_rtx, false, model, model);
+      return target;
     }
 
-  /* Generate the direct operation, if present.  */
+  /* Otherwise assume loads are atomic, and emit the proper barriers.  */
+  if (!target || target == const0_rtx)
+    target = gen_reg_rtx (mode);
+
+  /* Emit the appropriate barrier before the load.  */
+  expand_builtin_mem_thread_fence (model);
+
+  emit_move_insn (target, mem);
+
+  /* For SEQ_CST, also emit a barrier after the load.  */
+  if (model == MEMMODEL_SEQ_CST)
+    expand_builtin_mem_thread_fence (model);
+
+  return target;
+}
+
+/* This function expands the atomic store operation:
+   Atomically store VAL in MEM.
+   MEMMODEL is the memory model variant to use.
+   function returns const0_rtx if a pattern was emitted.  */
+
+rtx
+expand_atomic_store (rtx mem, rtx val, enum memmodel model)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+  struct expand_operand ops[3];
+
+  /* If the target supports the store directly, great.  */
+  icode = direct_optab_handler (atomic_store_optab, mode);
   if (icode != CODE_FOR_nothing)
     {
-      struct expand_operand ops[2];
-
       create_fixed_operand (&ops[0], mem);
-      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-      create_convert_operand_to (&ops[1], val, mode, true);
-      if (maybe_expand_insn (icode, 2, ops))
+      create_input_operand (&ops[1], val, mode);
+      create_integer_operand (&ops[2], model);
+      if (maybe_expand_insn (icode, 3, ops))
 	return const0_rtx;
     }
 
-  /* Failing that, generate a compare-and-swap loop in which we perform the
-     operation with normal arithmetic instructions.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
+  /* If the size of the object is greater than word size on this target,
+     a default store will not be atomic, Try a mem_exchange and throw away
+     the result.  If that doesn't work, don't do anything.  */
+  if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
     {
-      rtx t0 = gen_reg_rtx (mode), t1;
+      rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model);
+      if (target)
+        return const0_rtx;
+      else
+        return NULL_RTX;
+    }
 
-      start_sequence ();
+  /* If there is no mem_store, default to a move with barriers */
+  if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
+    expand_builtin_mem_thread_fence (model);
 
-      t1 = t0;
-      if (code == NOT)
-	{
-	  t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
-				    true, OPTAB_LIB_WIDEN);
-	  t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
-	}
-      else
-	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
-				  true, OPTAB_LIB_WIDEN);
-      insn = get_insns ();
-      end_sequence ();
+  emit_move_insn (mem, val);
 
-      if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
-	return const0_rtx;
-    }
+  /* For SEQ_CST, also emit a barrier after the load.  */
+  if (model == MEMMODEL_SEQ_CST)
+    expand_builtin_mem_thread_fence (model);
 
-  return NULL_RTX;
+  return const0_rtx;
 }
 
-/* This function generates the atomic operation MEM CODE= VAL.  In this
-   case, we do care about the resulting value: if AFTER is true then
-   return the value MEM holds after the operation, if AFTER is false
-   then return the value MEM holds before the operation.  TARGET is an
-   optional place for the result value to be stored.  */
 
-rtx
-expand_sync_fetch_operation (rtx mem, rtx val, enum rtx_code code,
-			     bool after, rtx target)
+/* Structure containing the pointers and values required to process the
+   various forms of the atomic_fetch_op and atomic_op_fetch builtins.  */
+
+struct atomic_op_functions
 {
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code old_code, new_code, icode;
-  bool compensate;
-  rtx insn;
+  struct direct_optab_d *mem_fetch_before;
+  struct direct_optab_d *mem_fetch_after;
+  struct direct_optab_d *mem_no_result;
+  struct direct_optab_d *fetch_before;
+  struct direct_optab_d *fetch_after;
+  struct direct_optab_d *no_result;
+  enum rtx_code reverse_code;
+};
+
+static const struct atomic_op_functions *
+get_atomic_op_for_code (enum rtx_code code)
+{
+  static const struct atomic_op_functions add_op = {
+    atomic_fetch_add_optab, atomic_add_fetch_optab, atomic_add_optab,
+    sync_old_add_optab, sync_new_add_optab, sync_add_optab, MINUS
+  }, sub_op = {
+    atomic_fetch_sub_optab, atomic_sub_fetch_optab, atomic_sub_optab,
+    sync_old_sub_optab, sync_new_sub_optab, sync_sub_optab, PLUS
+  }, xor_op = {
+    atomic_fetch_xor_optab, atomic_xor_fetch_optab, atomic_xor_optab,
+    sync_old_xor_optab, sync_new_xor_optab, sync_xor_optab, XOR
+  }, and_op = {
+    atomic_fetch_and_optab, atomic_and_fetch_optab, atomic_and_optab,
+    sync_old_and_optab, sync_new_and_optab, sync_and_optab, UNKNOWN
+  }, nand_op = {
+    atomic_fetch_nand_optab, atomic_nand_fetch_optab, atomic_nand_optab,
+    sync_old_nand_optab, sync_new_nand_optab, sync_nand_optab, UNKNOWN
+  }, ior_op = {
+    atomic_fetch_or_optab, atomic_or_fetch_optab, atomic_or_optab,
+    sync_old_ior_optab, sync_new_ior_optab, sync_ior_optab, UNKNOWN
+  };
 
-  /* Look to see if the target supports the operation directly.  */
   switch (code)
     {
     case PLUS:
-      old_code = direct_optab_handler (sync_old_add_optab, mode);
-      new_code = direct_optab_handler (sync_new_add_optab, mode);
-      break;
-    case IOR:
-      old_code = direct_optab_handler (sync_old_ior_optab, mode);
-      new_code = direct_optab_handler (sync_new_ior_optab, mode);
-      break;
+      return &add_op;
+    case MINUS:
+      return &sub_op;
     case XOR:
-      old_code = direct_optab_handler (sync_old_xor_optab, mode);
-      new_code = direct_optab_handler (sync_new_xor_optab, mode);
-      break;
+      return &xor_op;
     case AND:
-      old_code = direct_optab_handler (sync_old_and_optab, mode);
-      new_code = direct_optab_handler (sync_new_and_optab, mode);
-      break;
+      return &and_op;
+    case IOR:
+      return &ior_op;
     case NOT:
-      old_code = direct_optab_handler (sync_old_nand_optab, mode);
-      new_code = direct_optab_handler (sync_new_nand_optab, mode);
-      break;
-
-    case MINUS:
-      old_code = direct_optab_handler (sync_old_sub_optab, mode);
-      new_code = direct_optab_handler (sync_new_sub_optab, mode);
-      if ((old_code == CODE_FOR_nothing && new_code == CODE_FOR_nothing)
-          || CONST_INT_P (val))
-	{
-	  old_code = direct_optab_handler (sync_old_add_optab, mode);
-	  new_code = direct_optab_handler (sync_new_add_optab, mode);
-	  if (old_code != CODE_FOR_nothing || new_code != CODE_FOR_nothing)
-	    {
-	      val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1);
-	      code = PLUS;
-	    }
-	}
-      break;
-
+      return &nand_op;
     default:
       gcc_unreachable ();
     }
+}
+
+/* Try to emit an instruction for a specific operation varaition. 
+   OPTAB contains the OP functions.
+   TARGET is an optional place to return the result. const0_rtx means unused.
+   MEM is the memory location to operate on.
+   VAL is the value to use in the operation.
+   USE_MEMMODEL is TRUE if the variation with a memory model should be tried.
+   MODEL is the memory model, if used.
+   AFTER is true if the returned result is the value after the operation.  */
+
+static rtx 
+maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
+	       rtx val, bool use_memmodel, enum memmodel model, bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct direct_optab_d *this_optab;
+  struct expand_operand ops[4];
+  enum insn_code icode;
+  int op_counter = 0;
+  int num_ops;
 
-  /* If the target does supports the proper new/old operation, great.  But
-     if we only support the opposite old/new operation, check to see if we
-     can compensate.  In the case in which the old value is supported, then
-     we can always perform the operation again with normal arithmetic.  In
-     the case in which the new value is supported, then we can only handle
-     this in the case the operation is reversible.  */
-  compensate = false;
-  if (after)
+  /* Check to see if there is a result returned.  */
+  if (target == const0_rtx)
     {
-      icode = new_code;
-      if (icode == CODE_FOR_nothing)
-	{
-	  icode = old_code;
-	  if (icode != CODE_FOR_nothing)
-	    compensate = true;
+      if (use_memmodel)
+        {
+	  this_optab = optab->mem_no_result;
+	  create_integer_operand (&ops[2], model);
+	  num_ops = 3;
+	}
+      else
+        {
+	  this_optab = optab->no_result;
+	  num_ops = 2;
 	}
     }
+  /* Otherwise, we need to generate a result.  */
   else
     {
-      icode = old_code;
-      if (icode == CODE_FOR_nothing
-	  && (code == PLUS || code == MINUS || code == XOR))
+      if (use_memmodel)
+        {
+	  this_optab = after ? optab->mem_fetch_after : optab->mem_fetch_before;
+	  create_integer_operand (&ops[3], model);
+	  num_ops= 4;
+	}
+      else
 	{
-	  icode = new_code;
-	  if (icode != CODE_FOR_nothing)
-	    compensate = true;
+	  this_optab = after ? optab->fetch_after : optab->fetch_before;
+	  num_ops = 3;
 	}
+      create_output_operand (&ops[op_counter++], target, mode);
     }
 
-  /* If we found something supported, great.  */
-  if (icode != CODE_FOR_nothing)
+  icode = direct_optab_handler (this_optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return NULL_RTX;
+
+  create_fixed_operand (&ops[op_counter++], mem);
+  /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+  create_convert_operand_to (&ops[op_counter++], val, mode, true);
+
+  if (maybe_expand_insn (icode, num_ops, ops))
+    return ((target == const0_rtx) ? const0_rtx : ops[0].value);
+
+  return NULL_RTX;
+} 
+
+
+/* This function expands an atomic fetch_OP or OP_fetch operation:
+   TARGET is an option place to stick the return value.  const0_rtx indicates
+   the result is unused. 
+   atomically fetch MEM, perform the operation with VAL and return it to MEM.
+   CODE is the operation being performed (OP)
+   MEMMODEL is the memory model variant to use.
+   AFTER is true to return the result of the operation (OP_fetch).
+   AFTER is false to return the value before the operation (fetch_OP).  */
+rtx
+expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
+			enum memmodel model, bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  const struct atomic_op_functions *optab;
+  rtx result;
+  bool unused_result = (target == const0_rtx);
+
+  optab = get_atomic_op_for_code (code);
+
+  /* Check for the case where the result isn't used and try those patterns.  */
+  if (unused_result)
     {
-      struct expand_operand ops[3];
+      /* Try the memory model variant first.  */
+      result = maybe_emit_op (optab, target, mem, val, true, model, true);
+      if (result)
+        return result;
 
-      create_output_operand (&ops[0], target, mode);
-      create_fixed_operand (&ops[1], mem);
-      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-      create_convert_operand_to (&ops[2], val, mode, true);
-      if (maybe_expand_insn (icode, 3, ops))
-	{
-	  target = ops[0].value;
-	  val = ops[2].value;
-	  /* If we need to compensate for using an operation with the
-	     wrong return value, do so now.  */
-	  if (compensate)
-	    {
-	      if (!after)
-		{
-		  if (code == PLUS)
-		    code = MINUS;
-		  else if (code == MINUS)
-		    code = PLUS;
-		}
+      /* Next try the old style withuot a memory model.  */
+      result = maybe_emit_op (optab, target, mem, val, false, model, true);
+      if (result)
+        return result;
 
-	      if (code == NOT)
-		{
-		  target = expand_simple_binop (mode, AND, target, val,
-						NULL_RTX, true,
-						OPTAB_LIB_WIDEN);
-		  target = expand_simple_unop (mode, code, target,
-					       NULL_RTX, true);
-		}
-	      else
-		target = expand_simple_binop (mode, code, target, val,
-					      NULL_RTX, true,
-					      OPTAB_LIB_WIDEN);
-	    }
+      /* There is no no-result pattern, so try patterns with a result.  */
+      target = NULL_RTX;
+    }
 
-	  return target;
+  /* Try the __atomic version.  */
+  result = maybe_emit_op (optab, target, mem, val, true, model, after);
+  if (result)
+    return result;
+
+  /* Try the older __sync version.  */
+  result = maybe_emit_op (optab, target, mem, val, false, model, after);
+  if (result)
+    return result;
+
+  /* If the fetch value can be calculated from the other variation of fetch,
+     try that operation.  */
+  if (after || optab->reverse_code != UNKNOWN || target == const0_rtx) 
+    {
+      /* Try the __atomic version, then the older __sync version.  */
+      result = maybe_emit_op (optab, target, mem, val, true, model, !after);
+      if (!result)
+	result = maybe_emit_op (optab, target, mem, val, false, model, !after);
+
+      if (result)
+	{
+	  /* If the result isn't used, no need to do compensation code.  */
+	  if (unused_result)
+	    return target;
+
+	  /* Issue compensation code.  Fetch_after  == fetch_before OP val.
+	     Fetch_before == after REVERSE_OP val.  */
+	  if (!after)
+	    code = optab->reverse_code;
+	  result = expand_simple_binop (mode, code, result, val, NULL_RTX, true,
+					OPTAB_LIB_WIDEN);
+	  return result;
 	}
     }
 
-  /* Failing that, generate a compare-and-swap loop in which we perform the
-     operation with normal arithmetic instructions.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
+  /* If nothing else has succeeded, default to a compare and swap loop.  */
+  if (can_compare_and_swap_p (mode))
     {
+      rtx insn;
       rtx t0 = gen_reg_rtx (mode), t1;
 
-      if (!target || !register_operand (target, mode))
-	target = gen_reg_rtx (mode);
-
       start_sequence ();
 
-      if (!after)
-	emit_move_insn (target, t0);
+      /* If the result is used, get a register for it.  */
+      if (!unused_result) 
+        {
+	  if (!target || !register_operand (target, mode))
+	    target = gen_reg_rtx (mode);
+	  /* If fetch_before, copy the value now.  */
+	  if (!after)
+	    emit_move_insn (target, t0);
+	}
+      else
+        target = const0_rtx;
+
       t1 = t0;
       if (code == NOT)
-	{
+        {
 	  t1 = expand_simple_binop (mode, AND, t1, val, NULL_RTX,
 				    true, OPTAB_LIB_WIDEN);
 	  t1 = expand_simple_unop (mode, code, t1, NULL_RTX, true);
 	}
       else
-	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX,
-				  true, OPTAB_LIB_WIDEN);
-      if (after)
-	emit_move_insn (target, t1);
+	t1 = expand_simple_binop (mode, code, t1, val, NULL_RTX, true, 
+				  OPTAB_LIB_WIDEN);
 
+      /* For after, copy the value now.  */
+      if (!unused_result && after)
+        emit_move_insn (target, t1);
       insn = get_insns ();
       end_sequence ();
 
       if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn))
-	return target;
-    }
-
-  return NULL_RTX;
-}
-
-/* This function expands a test-and-set operation.  Ideally we atomically
-   store VAL in MEM and return the previous value in MEM.  Some targets
-   may not support this operation and only support VAL with the constant 1;
-   in this case while the return value will be 0/1, but the exact value
-   stored in MEM is target defined.  TARGET is an option place to stick
-   the return value.  */
-
-rtx
-expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-
-  /* If the target supports the test-and-set directly, great.  */
-  icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      struct expand_operand ops[3];
-
-      create_output_operand (&ops[0], target, mode);
-      create_fixed_operand (&ops[1], mem);
-      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-      create_convert_operand_to (&ops[2], val, mode, true);
-      if (maybe_expand_insn (icode, 3, ops))
-	return ops[0].value;
-    }
-
-  /* Otherwise, use a compare-and-swap loop for the exchange.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
-    {
-      if (!target || !register_operand (target, mode))
-	target = gen_reg_rtx (mode);
-      if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
-	val = convert_modes (mode, GET_MODE (val), val, 1);
-      if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
-	return target;
+        return target;
     }
 
   return NULL_RTX;
@@ -7838,6 +7975,14 @@ maybe_gen_insn (enum insn_code icode, unsigned int nops,
     case 6:
       return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
 			      ops[3].value, ops[4].value, ops[5].value);
+    case 7:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value);
+    case 8:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value, ops[7].value);
     }
   gcc_unreachable ();
 }
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 8357a29..d70b3fa 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -695,6 +695,34 @@ enum direct_optab_index
   /* Atomic clear with release semantics.  */
   DOI_sync_lock_release,
 
+  /* Atomic operations with memory model parameters. */
+  DOI_atomic_exchange,
+  DOI_atomic_compare_and_swap,
+  DOI_atomic_load,
+  DOI_atomic_store,
+  DOI_atomic_add_fetch,
+  DOI_atomic_sub_fetch,
+  DOI_atomic_and_fetch,
+  DOI_atomic_nand_fetch,
+  DOI_atomic_xor_fetch,
+  DOI_atomic_or_fetch,
+  DOI_atomic_fetch_add,
+  DOI_atomic_fetch_sub,
+  DOI_atomic_fetch_and,
+  DOI_atomic_fetch_nand,
+  DOI_atomic_fetch_xor,
+  DOI_atomic_fetch_or,
+  DOI_atomic_add,
+  DOI_atomic_sub,
+  DOI_atomic_and,
+  DOI_atomic_nand,
+  DOI_atomic_xor,
+  DOI_atomic_or,
+  DOI_atomic_always_lock_free,
+  DOI_atomic_is_lock_free,
+  DOI_atomic_thread_fence,
+  DOI_atomic_signal_fence,
+
   /* Vector permutation.  */
   DOI_vec_perm,
   DOI_vec_perm_const,
@@ -744,6 +772,60 @@ typedef struct direct_optab_d *direct_optab;
   (&direct_optab_table[(int) DOI_sync_lock_test_and_set])
 #define sync_lock_release_optab \
   (&direct_optab_table[(int) DOI_sync_lock_release])
+
+#define atomic_exchange_optab \
+  (&direct_optab_table[(int) DOI_atomic_exchange])
+#define atomic_compare_and_swap_optab \
+  (&direct_optab_table[(int) DOI_atomic_compare_and_swap])
+#define atomic_load_optab \
+  (&direct_optab_table[(int) DOI_atomic_load])
+#define atomic_store_optab \
+  (&direct_optab_table[(int) DOI_atomic_store])
+#define atomic_add_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_add_fetch])
+#define atomic_sub_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub_fetch])
+#define atomic_and_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_and_fetch])
+#define atomic_nand_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand_fetch])
+#define atomic_xor_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor_fetch])
+#define atomic_or_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_or_fetch])
+#define atomic_fetch_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_add])
+#define atomic_fetch_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_sub])
+#define atomic_fetch_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_and])
+#define atomic_fetch_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_nand])
+#define atomic_fetch_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_xor])
+#define atomic_fetch_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_or])
+#define atomic_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_add])
+#define atomic_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub])
+#define atomic_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_and])
+#define atomic_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand])
+#define atomic_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor])
+#define atomic_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_or])
+#define atomic_always_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_always_lock_free])
+#define atomic_is_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_is_lock_free])
+#define atomic_thread_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_thread_fence])
+#define atomic_signal_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_signal_fence])
+
 #define vec_perm_optab (&direct_optab_table[DOI_vec_perm])
 #define vec_perm_const_optab (&direct_optab_table[(int) DOI_vec_perm_const])
 
@@ -883,6 +965,13 @@ extern void expand_float (rtx, rtx, int);
 /* Return the insn_code for a FLOAT_EXPR.  */
 enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
 
+/* Return true if there is an inline compare and swap pattern.  */
+extern bool can_compare_and_swap_p (enum machine_mode);
+
+/* Generate code for a compare and swap.  */
+extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,
+					    enum memmodel, enum memmodel);
+
 /* Check whether an operation represented by the code CODE is a
    convert operation that is supported by the target platform in
    vector form */
diff --git a/gcc/params.def b/gcc/params.def
index fa63232..a7ae091 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -921,11 +921,26 @@ DEFPARAM (PARAM_CASE_VALUES_THRESHOLD,
           0, 0, 0)
 
 /* Data race flags for C++0x memory model compliance.  */
+DEFPARAM (PARAM_ALLOW_LOAD_DATA_RACES,
+	  "allow-load-data-races",
+	  "Allow new data races on loads to be introduced",
+	  1, 0, 1)
+
 DEFPARAM (PARAM_ALLOW_STORE_DATA_RACES,
 	  "allow-store-data-races",
 	  "Allow new data races on stores to be introduced",
 	  1, 0, 1)
 
+DEFPARAM (PARAM_ALLOW_PACKED_LOAD_DATA_RACES,
+	  "allow-packed-load-data-races",
+	  "Allow new data races on packed data loads to be introduced",
+	  1, 0, 1)
+
+DEFPARAM (PARAM_ALLOW_PACKED_STORE_DATA_RACES,
+	  "allow-packed-store-data-races",
+	  "Allow new data races on packed data stores to be introduced",
+	  1, 0, 1)
+
 /* Reassociation width to be used by tree reassoc optimization.  */
 DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
 	  "tree-reassoc-width",
diff --git a/gcc/params.h b/gcc/params.h
index 783f3b3..0bf8961 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -211,6 +211,13 @@ extern void init_param_values (int *params);
   PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
 #define MAX_STORES_TO_SINK \
   PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
+#define ALLOW_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_LOAD_DATA_RACES)
 #define ALLOW_STORE_DATA_RACES \
   PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
+#define ALLOW_PACKED_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_LOAD_DATA_RACES)
+#define ALLOW_PACKED_STORE_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_STORE_DATA_RACES)
+
 #endif /* ! GCC_PARAMS_H */
diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def
index 731d4a2..1a2df9a 100644
--- a/gcc/sync-builtins.def
+++ b/gcc/sync-builtins.def
@@ -256,3 +256,341 @@ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_LOCK_RELEASE_16, "__sync_lock_release_16",
 
 DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE, "__sync_synchronize",
 		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
+
+/* __sync* builtins for the C++ memory model.  */
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE,
+		  "__atomic_exchange",
+		  BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_N,
+		  "__atomic_exchange_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_1,
+		  "__atomic_exchange_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_2,
+		  "__atomic_exchange_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_4,
+		  "__atomic_exchange_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_8,
+		  "__atomic_exchange_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_EXCHANGE_16,
+		  "__atomic_exchange_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD,
+		  "__atomic_load",
+		  BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_N,
+		  "__atomic_load_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_1,
+		  "__atomic_load_1",
+		  BT_FN_I1_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_2,
+		  "__atomic_load_2",
+		  BT_FN_I2_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_4,
+		  "__atomic_load_4",
+		  BT_FN_I4_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_8,
+		  "__atomic_load_8",
+		  BT_FN_I8_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_LOAD_16,
+		  "__atomic_load_16",
+		  BT_FN_I16_CONST_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE,
+		  "__atomic_compare_exchange",
+		  BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT,
+		  ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N,
+		  "__atomic_compare_exchange_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1,
+		  "__atomic_compare_exchange_1",
+		  BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2,
+		  "__atomic_compare_exchange_2",
+		  BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4,
+		  "__atomic_compare_exchange_4",
+		  BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8,
+		  "__atomic_compare_exchange_8",
+		  BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16,
+		  "__atomic_compare_exchange_16",
+		  BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE,
+		  "__atomic_store",
+		  BT_FN_VOID_SIZE_VPTR_PTR_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_N,
+		  "__atomic_store_n",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_1,
+		  "__atomic_store_1",
+		  BT_FN_VOID_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_2,
+		  "__atomic_store_2",
+		  BT_FN_VOID_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_4,
+		  "__atomic_store_4",
+		  BT_FN_VOID_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_8,
+		  "__atomic_store_8",
+		  BT_FN_VOID_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_STORE_16,
+		  "__atomic_store_16",
+		  BT_FN_VOID_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_N,
+		  "__atomic_add_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_1,
+		  "__atomic_add_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_2,
+		  "__atomic_add_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_4,
+		  "__atomic_add_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_8,
+		  "__atomic_add_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ADD_FETCH_16,
+		  "__atomic_add_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_N,
+		  "__atomic_sub_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_1,
+		  "__atomic_sub_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_2,
+		  "__atomic_sub_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_4,
+		  "__atomic_sub_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_8,
+		  "__atomic_sub_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SUB_FETCH_16,
+		  "__atomic_sub_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_N,
+		  "__atomic_and_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_1,
+		  "__atomic_and_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_2,
+		  "__atomic_and_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_4,
+		  "__atomic_and_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_8,
+		  "__atomic_and_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_AND_FETCH_16,
+		  "__atomic_and_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_N,
+		  "__atomic_nand_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_1,
+		  "__atomic_nand_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_2,
+		  "__atomic_nand_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_4,
+		  "__atomic_nand_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_8,
+		  "__atomic_nand_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_NAND_FETCH_16,
+		  "__atomic_nand_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_N,
+		  "__atomic_xor_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_1,
+		  "__atomic_xor_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_2,
+		  "__atomic_xor_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_4,
+		  "__atomic_xor_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_8,
+		  "__atomic_xor_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_XOR_FETCH_16,
+		  "__atomic_xor_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_N,
+		  "__atomic_or_fetch",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_1,
+		  "__atomic_or_fetch_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_2,
+		  "__atomic_or_fetch_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_4,
+		  "__atomic_or_fetch_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_8,
+		  "__atomic_or_fetch_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_OR_FETCH_16,
+		  "__atomic_or_fetch_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_N,
+		  "__atomic_fetch_add",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_1,
+		  "__atomic_fetch_add_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_2,
+		  "__atomic_fetch_add_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_4,
+		  "__atomic_fetch_add_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_8,
+		  "__atomic_fetch_add_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_ADD_16,
+		  "__atomic_fetch_add_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_N,
+		  "__atomic_fetch_sub",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_1,
+		  "__atomic_fetch_sub_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_2,
+		  "__atomic_fetch_sub_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_4,
+		  "__atomic_fetch_sub_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_8,
+		  "__atomic_fetch_sub_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_SUB_16,
+		  "__atomic_fetch_sub_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_N,
+		  "__atomic_fetch_and",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_1,
+		  "__atomic_fetch_and_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_2,
+		  "__atomic_fetch_and_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_4,
+		  "__atomic_fetch_and_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_8,
+		  "__atomic_fetch_and_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_AND_16,
+		  "__atomic_fetch_and_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_N,
+		  "__atomic_fetch_nand",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_1,
+		  "__atomic_fetch_nand_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_2,
+		  "__atomic_fetch_nand_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_4,
+		  "__atomic_fetch_nand_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_8,
+		  "__atomic_fetch_nand_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_NAND_16,
+		  "__atomic_fetch_nand_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_N,
+		  "__atomic_fetch_xor",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_1,
+		  "__atomic_fetch_xor_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_2,
+		  "__atomic_fetch_xor_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_4,
+		  "__atomic_fetch_xor_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_8,
+		  "__atomic_fetch_xor_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_XOR_16,
+		  "__atomic_fetch_xor_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_N,
+		  "__atomic_fetch_or",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_1,
+		  "__atomic_fetch_or_1",
+		  BT_FN_I1_VPTR_I1_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_2,
+		  "__atomic_fetch_or_2",
+		  BT_FN_I2_VPTR_I2_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_4,
+		  "__atomic_fetch_or_4",
+		  BT_FN_I4_VPTR_I4_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_8,
+		  "__atomic_fetch_or_8",
+		  BT_FN_I8_VPTR_I8_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_FETCH_OR_16,
+		  "__atomic_fetch_or_16",
+		  BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_ALWAYS_LOCK_FREE,
+		  "__atomic_always_lock_free",
+		  BT_FN_BOOL_SIZE_CONST_VPTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_IS_LOCK_FREE,
+		  "__atomic_is_lock_free",
+		  BT_FN_BOOL_SIZE_CONST_VPTR, ATTR_CONST_NOTHROW_LEAF_LIST)
+
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_THREAD_FENCE,
+		  "__atomic_thread_fence",
+		  BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_ATOMIC_SIGNAL_FENCE,
+		  "__atomic_signal_fence",
+		  BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
+
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3b75995..aa0b7b6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,67 @@
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* lib/target-supports.exp (check_effective_target_sync_int_128,
+	check_effective_target_sync_long_long): Check whether the target
+	supports 64 and 128 bit __sync builtins.
+	(check_effective_target_cas_char): New.
+	(check_effective_target_cas_int): New.
+	* gcc.dg/dg.exp: Exclude simulate-thread tests.
+	* gcc.dg/atomic-noinline[-aux].c: New.  Make a variety of atomics calls.
+	* gcc.dg/atomic-generic[-aux].c: New. Test that generic functions
+	produce the expected library calls.
+	* gcc.dg/atomic-fence.c: New functional tests.
+	* gcc.dg/atomic-param.c: New.  Checl for illegal number of parameters.
+	* gcc.dg/atomic-invalid.c: New.  Test invalid parameters.
+	* gcc.dg/atomic-lockfree[-aux].c: New tests.
+	* gcc.dg/atomic-compare-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-op-[1-5].c: New.  Test atomic fetch functionality.
+	* gcc.dg/atomic-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-load-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-store-{1-5}.c: New functional tests.
+	* gcc.dg/simulate-thread/atomic-load-int128.c: New. Verify int128 loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-longlong.c: New. Verify 8 byte
+	loads are atomic.
+	* gcc.dg/simulate-thread/atomic-load-int.c: New. Verify 4 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-short.c: New. Verify 2 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int128.c: New. Verify other
+	int128 operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int.c: New. Verify other 4 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-longlong.c: New. Verify 8 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-short.c: New. Verify other 2 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/speculative-store.c: New. Verify speculative
+	stores aren't moved out of a loop.
+	* gcc.dg/simulate-thread/strict-align-global.c: New. Verify small
+	globals don't overwrite neighbouring globals.
+	* gcc.dg/simulate-thread/subfields.c: New. Verify struct component
+	writes dont overwrite neighbouring components.
+	* c-c++-common/gomp/atomic-10.c: Use cas_int; match __atomic builtin.
+	* c-c++-common/gomp/atomic-3.c: Likewise.
+	* c-c++-common/gomp/atomic-9.c: Likewise.
+	* gcc.dg/gomp/atomic-1.c, gcc.dg/gomp/atomic-2.c,
+	gcc.dg/gomp/atomic-3.c, gcc.dg/gomp/atomic-4.c, gcc.dg/gomp/atomic-7.c,
+	gcc.dg/gomp/atomic-8.c, gcc.dg/gomp/atomic-9.c,
+	gcc.dg/gomp/atomic-10.c, gcc.dg/gomp/atomic-12.c,
+	gcc.dg/gomp/atomic-13.c, gcc.dg/gomp/atomic-14.c,
+	gcc.dg/gomp/atomic-15.c: Move to c-c++-common/gomp/.
+	* g++.dg/gomp/atomic-1.C, g++.dg/gomp/atomic-2.C,
+	g++.dg/gomp/atomic-3.C, g++.dg/gomp/atomic-4.C, g++.dg/gomp/atomic-7.C,
+	g++.dg/gomp/atomic-8.C, g++.dg/gomp/atomic-9.C,
+	g++.dg/gomp/atomic-10.C, g++.dg/gomp/atomic-11.C,
+	g++.dg/gomp/atomic-12.C, g++.dg/gomp/atomic-13.C,
+	g++.dg/gomp/atomic-15.C: Remove.
+	* gcc.dg/gomp/gomp.exp, g++.dg/gomp/gomp.exp: Run c-c++-common tests.
+	* gcc.dg/gomp/atomic-11.c: Remove test.
+
 2011-11-06  Ira Rosen  <ira.rosen@linaro.org>
 
 	* gcc.dg/vect/bb-slp-cond-1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-1.c b/gcc/testsuite/c-c++-common/gomp/atomic-1.c
index 3e4bc56..3e4bc56 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-1.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-10.c b/gcc/testsuite/c-c++-common/gomp/atomic-10.c
index 936d0c1..21d035e 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-10.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-10.c
@@ -1,6 +1,7 @@
 /* PR middle-end/28046 */
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */
 
 int a[3], b;
 struct C { int x; int y; } c;
@@ -20,5 +21,5 @@ foo (void)
   *baz () += bar ();
 }
 
-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 4 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-12.c b/gcc/testsuite/c-c++-common/gomp/atomic-12.c
index 618c4c8..618c4c8 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-12.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-12.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-13.c b/gcc/testsuite/c-c++-common/gomp/atomic-13.c
index 0146825..0146825 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-13.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-13.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-14.c b/gcc/testsuite/c-c++-common/gomp/atomic-14.c
index f8fc9d8..f8fc9d8 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-14.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-14.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-15.c b/gcc/testsuite/c-c++-common/gomp/atomic-15.c
index 13a9e0c..13a9e0c 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-15.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-15.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-2.c b/gcc/testsuite/c-c++-common/gomp/atomic-2.c
index 720ec9e..720ec9e 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-2.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-3.c b/gcc/testsuite/c-c++-common/gomp/atomic-3.c
index 7ea792d..5b9e60c 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-3.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-3.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */
 
 int *xyzzy;
 
@@ -9,5 +10,5 @@ void f1(void)
     xyzzy++;
 }
 
-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-4.c b/gcc/testsuite/c-c++-common/gomp/atomic-4.c
index 7f27370..7f27370 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-4.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-7.c b/gcc/testsuite/c-c++-common/gomp/atomic-7.c
index 612e97f..612e97f 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-7.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-7.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-8.c b/gcc/testsuite/c-c++-common/gomp/atomic-8.c
index 2f04151..2f04151 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-8.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-8.c
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-9.c b/gcc/testsuite/c-c++-common/gomp/atomic-9.c
index 2fafbd4..ff5cb40 100644
--- a/gcc/testsuite/gcc.dg/gomp/atomic-9.c
+++ b/gcc/testsuite/c-c++-common/gomp/atomic-9.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */
 
 volatile int *bar(void);
 
@@ -9,5 +10,5 @@ void f1(void)
     *bar() += 1;
 }
 
-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
diff --git a/gcc/testsuite/g++.dg/dg.exp b/gcc/testsuite/g++.dg/dg.exp
index 5851dcc..cee19d6 100644
--- a/gcc/testsuite/g++.dg/dg.exp
+++ b/gcc/testsuite/g++.dg/dg.exp
@@ -48,6 +48,7 @@ set tests [prune $tests $srcdir/$subdir/tree-prof/*]
 set tests [prune $tests $srcdir/$subdir/torture/*]
 set tests [prune $tests $srcdir/$subdir/graphite/*]
 set tests [prune $tests $srcdir/$subdir/guality/*]
+set tests [prune $tests $srcdir/$subdir/simulate-thread/*]
 
 # Main loop.
 dg-runtest $tests "" $DEFAULT_CXXFLAGS
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-1.C b/gcc/testsuite/g++.dg/gomp/atomic-1.C
deleted file mode 100644
index 3e4bc56..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-1.C
+++ /dev/null
@@ -1,99 +0,0 @@
-/* { dg-do compile } */
-
-int x;
-volatile int y;
-volatile unsigned char z;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x |= 1;
-  #pragma omp atomic
-    x &= 1;
-  #pragma omp atomic
-    x ^= 1;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x <<= 3;
-  #pragma omp atomic
-    x >>= 3;
-}
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y |= 1;
-  #pragma omp atomic
-    y &= 1;
-  #pragma omp atomic
-    y ^= 1;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y <<= 3;
-  #pragma omp atomic
-    y >>= 3;
-}
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z |= 1;
-  #pragma omp atomic
-    z &= 1;
-  #pragma omp atomic
-    z ^= 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z <<= 3;
-  #pragma omp atomic
-    z >>= 3;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-10.C b/gcc/testsuite/g++.dg/gomp/atomic-10.C
deleted file mode 100644
index fe64f0f..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-10.C
+++ /dev/null
@@ -1,24 +0,0 @@
-// PR middle-end/28046
-// { dg-do compile }
-// { dg-options "-fopenmp -fdump-tree-ompexp" }
-
-int a[3], b;
-struct C { int x; int y; } c;
-
-int bar (void), *baz (void);
-
-void
-foo (void)
-{
-#pragma omp atomic
-  a[2] += bar ();
-#pragma omp atomic
-  b += bar ();
-#pragma omp atomic
-  c.y += bar ();
-#pragma omp atomic
-  *baz () += bar ();
-}
-
-// { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } }
-// { dg-final { cleanup-tree-dump "ompexp" } }
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-11.C b/gcc/testsuite/g++.dg/gomp/atomic-11.C
deleted file mode 100644
index 618c4c8..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-11.C
+++ /dev/null
@@ -1,306 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#ifdef __cplusplus
-bool atomicvar, c;
-#else
-_Bool atomicvar, c;
-#endif
-int i, atomicvar2, c2;
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar;
-  #pragma omp barrier
-#ifndef __cplusplus
-  #pragma omp atomic
-    atomicvar--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar;
-  #pragma omp barrier
-#endif
-  return 0;
-}
-
-int
-bar (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar2;
-  #pragma omp barrier
-  return 0;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-12.C b/gcc/testsuite/g++.dg/gomp/atomic-12.C
deleted file mode 100644
index 6c1f965..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-12.C
+++ /dev/null
@@ -1,9 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0 -O2" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#include "atomic-11.C"
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-13.C b/gcc/testsuite/g++.dg/gomp/atomic-13.C
deleted file mode 100644
index f8fc9d8..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-13.C
+++ /dev/null
@@ -1,43 +0,0 @@
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-
-#ifdef __cplusplus
-bool *baz ();
-#else
-_Bool *baz ();
-#endif
-int *bar ();
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*baz ());
-#ifndef __cplusplus
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*baz ());
-  #pragma omp barrier
-#endif
-  return 0;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-15.C b/gcc/testsuite/g++.dg/gomp/atomic-15.C
deleted file mode 100644
index 95eb8b4..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-15.C
+++ /dev/null
@@ -1,46 +0,0 @@
-// { dg-do compile }
-// { dg-options "-fopenmp" }
-
-int x = 6;
-
-int
-main ()
-{
-  int v;
-  #pragma omp atomic
-    x = x * 7 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x * 7 ^ 6;	// { dg-error "expected" }
-  #pragma omp atomic update
-    x = x - 8 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x ^ 7 | 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 * 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 / 2;	// { dg-error "expected" }
-  #pragma omp atomic capture
-    v = x = x | 6;	// { dg-error "invalid operator" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 ^ 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x - 8 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x ^ 7 | 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 * 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 / 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 ^ 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x - 8 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x ^ 7 | 2; v = x; }	// { dg-error "expected" }
-  (void) v;
-  return 0;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-2.C b/gcc/testsuite/g++.dg/gomp/atomic-2.C
deleted file mode 100644
index 720ec9e..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-2.C
+++ /dev/null
@@ -1,23 +0,0 @@
-/* { dg-do compile } */
-
-float x, y;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-3.C b/gcc/testsuite/g++.dg/gomp/atomic-3.C
deleted file mode 100644
index 7ea792d..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-3.C
+++ /dev/null
@@ -1,13 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-int *xyzzy;
-
-void f1(void)
-{
-  #pragma omp atomic
-    xyzzy++;
-}
-
-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-4.C b/gcc/testsuite/g++.dg/gomp/atomic-4.C
deleted file mode 100644
index 7f27370..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-4.C
+++ /dev/null
@@ -1,24 +0,0 @@
-/* { dg-do compile } */
-
-int a[4];
-int *p;
-struct S { int x; int y[4]; } s;
-int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    a[4] += 1;
-  #pragma omp atomic
-    *p += 1;
-  #pragma omp atomic
-    s.x += 1;
-  #pragma omp atomic
-    s.y[*p] += 1;
-  #pragma omp atomic
-    s.y[*p] *= 42;
-  #pragma omp atomic
-    *bar() += 1;
-  #pragma omp atomic
-    *bar() *= 42;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-7.C b/gcc/testsuite/g++.dg/gomp/atomic-7.C
deleted file mode 100644
index 612e97f..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-7.C
+++ /dev/null
@@ -1,23 +0,0 @@
-/* { dg-do compile } */
-
-double x, y;
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-8.C b/gcc/testsuite/g++.dg/gomp/atomic-8.C
deleted file mode 100644
index 2f04151..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-8.C
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-do compile } */
-
-long double z;
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-}
diff --git a/gcc/testsuite/g++.dg/gomp/atomic-9.C b/gcc/testsuite/g++.dg/gomp/atomic-9.C
deleted file mode 100644
index 2fafbd4..0000000
--- a/gcc/testsuite/g++.dg/gomp/atomic-9.C
+++ /dev/null
@@ -1,13 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-volatile int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    *bar() += 1;
-}
-
-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/gomp.exp b/gcc/testsuite/g++.dg/gomp/gomp.exp
index 9f60bc1..b99d302 100644
--- a/gcc/testsuite/g++.dg/gomp/gomp.exp
+++ b/gcc/testsuite/g++.dg/gomp/gomp.exp
@@ -27,7 +27,7 @@ if ![check_effective_target_fopenmp] {
 dg-init
 
 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] "" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"
 
 # All done.
 dg-finish
diff --git a/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C b/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
new file mode 100644
index 0000000..7e0041e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
@@ -0,0 +1,73 @@
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that atomic int and atomic char work properly.  */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic<int> atomi;
+atomic<char> atomc;
+
+/* No need for parallel threads to do anything */
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify after every instruction is executed, that the atmoic int and
+   char have one of the 2 legitimate values. */
+int simulate_thread_step_verify()
+{
+  if (atomi != 0 && atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != 0 && atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+
+/* Verify that both atmoics have the corerct value.  */
+int simulate_thread_final_verify()
+{
+  if (atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid final result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid final result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+/* Test a store to an atomic int and an atomic char. */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  atomi = INT_MAX;
+  atomc = CHAR_MAX;
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C b/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
new file mode 100644
index 0000000..be3232d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
@@ -0,0 +1,58 @@
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic_int atomi;
+
+/* Non-atomic.  Use a type wide enough to possibly coerce GCC into
+   moving things around.  */
+long double j;
+
+
+/* Test that an atomic store synchronizes with an atomic load.
+
+   In this case, test that the store to <j> happens-before the atomic
+   store to <atomi>.  Make sure the compiler does not reorder the
+   stores.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  j = 13.0;
+  atomi.store(1);
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
+
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify that side-effects before an atomic store are correctly
+   synchronized with the an atomic load to the same location.  */
+int simulate_thread_step_verify()
+{
+  if (atomi.load() == 1 && j != 13.0)
+    {
+      printf ("FAIL: invalid synchronization for atomic load/store.\n");
+      return 1;
+    }
+  return 0;
+}
+
+
+int simulate_thread_final_verify()
+{
+  return simulate_thread_step_verify();
+}
diff --git a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
new file mode 100644
index 0000000..077514a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
@@ -0,0 +1,77 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  unsigned int a : 4;
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
new file mode 100644
index 0000000..3acf21f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
@@ -0,0 +1,80 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  /* On x86-64, the volatile causes us to access <a> with a 32-bit
+     access, and thus trigger this test.  */
+  volatile unsigned int a : 4;
+
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c b/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
new file mode 100644
index 0000000..2ac54e8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
@@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v = 0;
+char expected = 0;
+char max = ~0;
+char desired = ~0;
+char zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c b/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
new file mode 100644
index 0000000..73b2597
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
@@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
+
+extern void abort(void);
+
+short v = 0;
+short expected = 0;
+short max = ~0;
+short desired = ~0;
+short zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c b/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
new file mode 100644
index 0000000..2609728
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
@@ -0,0 +1,85 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for an int.  */
+
+extern void abort(void);
+
+int v = 0;
+int expected = 0;
+int max = ~0;
+int desired = ~0;
+int zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c b/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
new file mode 100644
index 0000000..d89e72f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
@@ -0,0 +1,86 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v = 0;
+long long expected = 0;
+long long max = ~0;
+long long desired = ~0;
+long long zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c b/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
new file mode 100644
index 0000000..e716dcb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
@@ -0,0 +1,86 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for an int_128.  */
+
+extern void abort(void);
+
+__int128_t v = 0;
+__int128_t expected = 0;
+__int128_t max = ~0;
+__int128_t desired = ~0;
+__int128_t zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-exchange-1.c b/gcc/testsuite/gcc.dg/atomic-exchange-1.c
new file mode 100644
index 0000000..fb78cdb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-1.c
@@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-exchange-2.c b/gcc/testsuite/gcc.dg/atomic-exchange-2.c
new file mode 100644
index 0000000..153771a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-2.c
@@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_X builtin for a short.  */
+
+extern void abort(void);
+
+short v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-exchange-3.c b/gcc/testsuite/gcc.dg/atomic-exchange-3.c
new file mode 100644
index 0000000..fbf8f6b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-3.c
@@ -0,0 +1,62 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_X builtin for an int.  */
+
+extern void abort(void);
+
+int v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-exchange-4.c b/gcc/testsuite/gcc.dg/atomic-exchange-4.c
new file mode 100644
index 0000000..f0530fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-4.c
@@ -0,0 +1,63 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_X builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-exchange-5.c b/gcc/testsuite/gcc.dg/atomic-exchange-5.c
new file mode 100644
index 0000000..13fd6d1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-5.c
@@ -0,0 +1,63 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_X builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-fence.c b/gcc/testsuite/gcc.dg/atomic-fence.c
new file mode 100644
index 0000000..1f6d187
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-fence.c
@@ -0,0 +1,27 @@
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test that __atomic_{thread,signal}_fence builtins execute.  */
+
+main ()
+{
+  __atomic_thread_fence (__ATOMIC_RELAXED);
+  __atomic_thread_fence (__ATOMIC_CONSUME);
+  __atomic_thread_fence (__ATOMIC_ACQUIRE);
+  __atomic_thread_fence (__ATOMIC_RELEASE);
+  __atomic_thread_fence (__ATOMIC_ACQ_REL);
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+
+  __atomic_signal_fence (__ATOMIC_RELAXED);
+  __atomic_signal_fence (__ATOMIC_CONSUME);
+  __atomic_signal_fence (__ATOMIC_ACQUIRE);
+  __atomic_signal_fence (__ATOMIC_RELEASE);
+  __atomic_signal_fence (__ATOMIC_ACQ_REL);
+  __atomic_signal_fence (__ATOMIC_SEQ_CST);
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-generic-aux.c b/gcc/testsuite/gcc.dg/atomic-generic-aux.c
new file mode 100644
index 0000000..a6b552a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-generic-aux.c
@@ -0,0 +1,45 @@
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+void
+__atomic_exchange (size_t size, void *obj, void *val, void *ret, int model)
+{
+  /* Copy old value into *ret.  */
+  memcpy (ret, obj, size);
+  /* Copy val into object.  */
+  memcpy (obj, val, size);
+}
+
+
+bool
+__atomic_compare_exchange (size_t size, void *obj, void *expected, 
+			   void *desired, int model1, int model2)
+{
+  if (!memcmp (obj, expected, size))
+    {
+      memcpy (obj, desired, size);
+      return true;
+    }
+  memcpy (expected, obj, size);
+  return false;
+}
+
+
+void __atomic_load (size_t size, void *obj, void *ret, int model)
+{
+  memcpy (ret, obj, size);
+}
+
+
+void __atomic_store (size_t size, void *obj, void *val, int model)
+{
+  memcpy (obj, val, size);
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-generic.c b/gcc/testsuite/gcc.dg/atomic-generic.c
new file mode 100644
index 0000000..8a5528c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-generic.c
@@ -0,0 +1,56 @@
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-generic-aux.c" } */
+
+/* Test that the generioc atomic builtins execute as expected..
+   sync-mem-generic-aux.c supplies a functional external entry point for 
+   the 4 generic functions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+typedef struct test {
+  int array[10];
+} test_struct;
+
+test_struct zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+test_struct ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+test_struct a,b;
+
+int size = sizeof (test_struct);
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  test_struct c;
+
+  __atomic_store (&a, &zero, __ATOMIC_RELAXED);
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  __atomic_exchange (&a, &ones, &c, __ATOMIC_SEQ_CST);
+  if (memcmp (&c, &zero, size))
+    abort ();
+  if (memcmp (&a, &ones, size))
+    abort ();
+
+  __atomic_load (&a, &b, __ATOMIC_RELAXED);
+  if (memcmp (&b, &ones, size))
+    abort ();
+
+  if (!__atomic_compare_exchange (&a, &b, &zero, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  if (__atomic_compare_exchange (&a, &b, &ones, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&b, &zero, size))
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-invalid.c b/gcc/testsuite/gcc.dg/atomic-invalid.c
new file mode 100644
index 0000000..2b73c91
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-invalid.c
@@ -0,0 +1,29 @@
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+#include <stddef.h>
+
+int i, e, b;
+size_t s;
+
+main ()
+{
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST); /* { dg-error "failure memory model cannot be stronger" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELEASE); /* { dg-error "invalid failure memory" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL); /* { dg-error "invalid failure memory" } */
+
+  __atomic_exchange_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+
+  __atomic_load_n (&i, __ATOMIC_RELEASE); /* { dg-error "invalid memory model" } */
+  __atomic_load_n (&i, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  __atomic_store_n (&i, 1, __ATOMIC_ACQUIRE); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  i = __atomic_always_lock_free (s, NULL); /* { dg-error "non-constant argument" } */
+
+  __atomic_load_n (&i, 44); /* { dg-warning "invalid memory model" } */
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-load-1.c b/gcc/testsuite/gcc.dg/atomic-load-1.c
new file mode 100644
index 0000000..928f9b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-load-1.c
@@ -0,0 +1,66 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-load-2.c b/gcc/testsuite/gcc.dg/atomic-load-2.c
new file mode 100644
index 0000000..3d1df1c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-load-2.c
@@ -0,0 +1,68 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-load-3.c b/gcc/testsuite/gcc.dg/atomic-load-3.c
new file mode 100644
index 0000000..ec238be
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-load-3.c
@@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+extern void abort(void);
+
+int v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-load-4.c b/gcc/testsuite/gcc.dg/atomic-load-4.c
new file mode 100644
index 0000000..5cb7659
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-load-4.c
@@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-load-5.c b/gcc/testsuite/gcc.dg/atomic-load-5.c
new file mode 100644
index 0000000..2991e4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-load-5.c
@@ -0,0 +1,65 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c b/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
new file mode 100644
index 0000000..0ea872c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
@@ -0,0 +1,17 @@
+/* Test supply a __atomic_is_lock_free routine for lock-free tests.  */
+/* Just compile it on its own.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.  */
+
+#include <stdlib.h>
+
+/* Supply a builtin external function which returns a non-standard value so
+   it can be detected that it was called.  */
+int 
+__atomic_is_lock_free (size_t s, void *p)
+{
+  return 2;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-lockfree.c b/gcc/testsuite/gcc.dg/atomic-lockfree.c
new file mode 100644
index 0000000..2254282
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree.c
@@ -0,0 +1,120 @@
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-lockfree-aux.c" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.
+   sync-mem-lockfree-aux.c supplies and external entry point for 
+   __atomic_is_lock_free which always returns a 2. We can detect the 
+   external routine was called if 2 is returned since that is not a valid
+   result normally.  */
+
+#include <stdlib.h>
+
+extern void abort();
+
+int r1, r2;
+
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  
+  r1 = __atomic_always_lock_free (sizeof(char), 0);
+  r2 = __atomic_is_lock_free (sizeof(char), 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+  
+  r1 = __atomic_always_lock_free (2, 0);
+  r2 = __atomic_is_lock_free (2, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (4, 0);
+  r2 = __atomic_is_lock_free (4, 0);     /* Try passing in a variable.  */
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (8, 0);
+  r2 = __atomic_is_lock_free (8, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (16, 0);
+  r2 = __atomic_is_lock_free (16, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (32, 0);
+  r2 = __atomic_is_lock_free (32, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+ 
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-noinline-aux.c b/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
new file mode 100644
index 0000000..b92fcfc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
@@ -0,0 +1,51 @@
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  This file provides
+   the exact entry points the test file will require.  All these routines
+   simply set the first parameter to 1, and the caller will test for that.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+
+char 
+__atomic_exchange_1 (char *p, char t, int i)
+{
+  *p = 1;
+}
+
+short
+__atomic_load_2 (short *p, int i)
+{ 
+  *p = 1;
+}
+
+void
+__atomic_store_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+int __atomic_compare_exchange_2 (short *p, short *a, short b, int x, int y, int z)
+{
+  *p = 1;
+}
+
+char __atomic_fetch_add_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+short __atomic_fetch_add_2 (short *p, short v, short i)
+{
+  *p = 1;
+}
+
+int __atomic_is_lock_free (int i, void *p)
+{
+  return 10;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-noinline.c b/gcc/testsuite/gcc.dg/atomic-noinline.c
new file mode 100644
index 0000000..06a93e0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-noinline.c
@@ -0,0 +1,56 @@
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w -fno-inline-atomics" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-noinline-aux.c" } */
+
+/* Test that -fno-inline-atomics works as expected.  
+   atomic-generic-aux provide the expected routines which simply set the
+   value of the first parameter to */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+short as,bs,cs;
+char ac,bc,cc;
+
+main ()
+{
+
+  ac = __atomic_exchange_n (&bc, cc, __ATOMIC_RELAXED);
+  if (bc != 1)
+    abort ();
+
+  as = __atomic_load_n (&bs, __ATOMIC_SEQ_CST);
+  if (bs != 1)
+    abort ();
+
+  __atomic_store_n (&ac, bc, __ATOMIC_RELAXED);
+  if (ac != 1)
+    abort ();
+
+  __atomic_compare_exchange_n (&as, &bs, cs, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+  if (as != 1)
+    abort ();
+
+  ac = __atomic_fetch_add (&cc, 15, __ATOMIC_SEQ_CST);
+  if (cc != 1)
+    abort ();
+
+  /* This should be translated to __atomic_fetch_add for the library */
+  as = __atomic_add_fetch (&cs, 10, __ATOMIC_RELAXED);
+
+  if (cs != 1)
+    abort ();
+
+  /* The fake external function should return 10.  */
+  if (__atomic_is_lock_free (4, 0) != 10)
+    abort ();
+   
+  return 0;
+}
+
+
+
diff --git a/gcc/testsuite/gcc.dg/atomic-op-1.c b/gcc/testsuite/gcc.dg/atomic-op-1.c
new file mode 100644
index 0000000..bc1716f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-op-1.c
@@ -0,0 +1,554 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_*OP builtin routines for a char.  */
+
+extern void abort(void);
+
+char v, count, res;
+const char init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-op-2.c b/gcc/testsuite/gcc.dg/atomic-op-2.c
new file mode 100644
index 0000000..8755340
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-op-2.c
@@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_*OP builtin routines for a short.  */
+
+extern void abort(void);
+
+short v, count, res;
+const short init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-op-3.c b/gcc/testsuite/gcc.dg/atomic-op-3.c
new file mode 100644
index 0000000..69db489
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-op-3.c
@@ -0,0 +1,554 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_*OP builtin routines for an int.  */
+
+extern void abort(void);
+
+int v, count, res;
+const int init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-op-4.c b/gcc/testsuite/gcc.dg/atomic-op-4.c
new file mode 100644
index 0000000..3965021
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-op-4.c
@@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_*OP builtin routines for long long.  */
+
+extern void abort(void);
+
+long long v, count, res;
+const long long init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-op-5.c b/gcc/testsuite/gcc.dg/atomic-op-5.c
new file mode 100644
index 0000000..2ca71ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-op-5.c
@@ -0,0 +1,555 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_*OP builtin routines for an int_128.  */
+
+extern void abort(void);
+
+__int128_t v, count, res;
+const __int128_t init = ~0;
+
+/* The fetch_op routines return the original value before the operation.  */
+
+void
+test_fetch_add ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_RELAXED) != 0)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_CONSUME) != 1) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQUIRE) != 2)
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_RELEASE) != 3) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, count, __ATOMIC_ACQ_REL) != 4) 
+    abort ();
+
+  if (__atomic_fetch_add (&v, 1, __ATOMIC_SEQ_CST) != 5) 
+    abort ();
+}
+
+
+void
+test_fetch_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_RELAXED) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_CONSUME) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQUIRE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, count + 1, __ATOMIC_ACQ_REL) !=  res--) 
+    abort ();
+
+  if (__atomic_fetch_sub (&v, 1, __ATOMIC_SEQ_CST) !=  res--) 
+    abort ();
+}
+
+void
+test_fetch_and ()
+{
+  v = init;
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQUIRE) !=  0)
+    abort ();
+
+  v = ~v;
+  if (__atomic_fetch_and (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_fetch_nand ()
+{
+  v = init;
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_ACQUIRE) !=  0 ) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_nand (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_xor ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_fetch_xor (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE) !=  0) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_fetch_xor (&v, ~count, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+void
+test_fetch_or ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_fetch_or (&v, count, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 2, __ATOMIC_CONSUME) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQUIRE) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, 8, __ATOMIC_RELEASE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_ACQ_REL) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST) !=  31) 
+    abort ();
+}
+
+/* The OP_fetch routines return the new value after the operation.  */
+
+void
+test_add_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_RELAXED) != 1)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_CONSUME) != 2) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQUIRE) != 3)
+    abort ();
+
+  if (__atomic_add_fetch (&v, 1, __ATOMIC_RELEASE) != 4) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL) != 5) 
+    abort ();
+
+  if (__atomic_add_fetch (&v, count, __ATOMIC_SEQ_CST) != 6) 
+    abort ();
+}
+
+
+void
+test_sub_fetch ()
+{
+  v = res = 20;
+  count = 0;
+
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED) !=  --res) 
+    abort ();
+
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_CONSUME) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQUIRE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, 1, __ATOMIC_RELEASE) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL) !=  --res) 
+    abort ();                                                  
+                                                               
+  if (__atomic_sub_fetch (&v, count + 1, __ATOMIC_SEQ_CST) !=  --res) 
+    abort ();
+}
+
+void
+test_and_fetch ()
+{
+  v = init;
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_RELAXED) !=  0) 
+    abort ();
+
+  v = init;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_CONSUME) !=  init) 
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, init, __ATOMIC_RELEASE) !=  init)
+    abort ();
+
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  0) 
+    abort ();
+
+  v = ~v;
+  if (__atomic_and_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_nand_fetch ()
+{
+  v = init;
+
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_RELAXED) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_CONSUME) !=  0) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_RELEASE) !=  0)
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, init, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();              
+                           
+  if (__atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST) !=  init) 
+    abort ();
+}
+
+
+
+void
+test_xor_fetch ()
+{
+  v = init;
+  count = 0;
+
+  if (__atomic_xor_fetch (&v, count, __ATOMIC_RELAXED) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_CONSUME) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE) !=  0) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_RELEASE) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, 0, __ATOMIC_ACQ_REL) !=  init) 
+    abort ();
+
+  if (__atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST) !=  0) 
+    abort ();
+}
+
+void
+test_or_fetch ()
+{
+  v = 0;
+  count = 1;
+
+  if (__atomic_or_fetch (&v, count, __ATOMIC_RELAXED) !=  1) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 2, __ATOMIC_CONSUME) !=  3) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQUIRE) !=  7) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, 8, __ATOMIC_RELEASE) !=  15) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL) !=  31) 
+    abort ();
+
+  count *= 2;
+  if (__atomic_or_fetch (&v, count, __ATOMIC_SEQ_CST) !=  63) 
+    abort ();
+}
+
+
+/* Test the OP routines with a result which isn't used. Use both variations
+   within each function.  */
+
+void
+test_add ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_add_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_CONSUME);
+  if (v != 2)
+    abort ();
+
+  __atomic_add_fetch (&v, 1 , __ATOMIC_ACQUIRE);
+  if (v != 3)
+    abort ();
+
+  __atomic_fetch_add (&v, 1, __ATOMIC_RELEASE);
+  if (v != 4)
+    abort ();
+
+  __atomic_add_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 5)
+    abort ();
+
+  __atomic_fetch_add (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 6)
+    abort ();
+}
+
+
+void
+test_sub()
+{
+  v = res = 20;
+  count = 0;
+
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != --res)
+    abort ();
+
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_CONSUME);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, 1, __ATOMIC_ACQUIRE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, 1, __ATOMIC_RELEASE);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_sub_fetch (&v, count + 1, __ATOMIC_ACQ_REL);
+  if (v != --res)
+    abort ();                                                  
+                                                               
+  __atomic_fetch_sub (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != --res)
+    abort ();
+}
+
+void
+test_and ()
+{
+  v = init;
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_RELAXED);
+  if (v != 0)
+    abort ();
+
+  v = init;
+  __atomic_fetch_and (&v, init, __ATOMIC_CONSUME);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, init, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_and_fetch (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != 0)
+    abort ();
+
+  v = ~v;
+  __atomic_fetch_and (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_nand ()
+{
+  v = init;
+
+  __atomic_fetch_nand (&v, 0, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, init, __ATOMIC_RELEASE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_nand (&v, init, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_nand_fetch (&v, 0, __ATOMIC_SEQ_CST);
+  if (v != init)
+    abort ();
+}
+
+
+
+void
+test_xor ()
+{
+  v = init;
+  count = 0;
+
+  __atomic_xor_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_CONSUME);
+  if (v != 0)
+    abort ();
+
+  __atomic_xor_fetch (&v, 0, __ATOMIC_ACQUIRE);
+  if (v != 0)
+    abort ();
+
+  __atomic_fetch_xor (&v, ~count, __ATOMIC_RELEASE);
+  if (v != init)
+    abort ();
+
+  __atomic_fetch_xor (&v, 0, __ATOMIC_ACQ_REL);
+  if (v != init)
+    abort ();
+
+  __atomic_xor_fetch (&v, ~count, __ATOMIC_SEQ_CST);
+  if (v != 0)
+    abort ();
+}
+
+void
+test_or ()
+{
+  v = 0;
+  count = 1;
+
+  __atomic_or_fetch (&v, count, __ATOMIC_RELAXED);
+  if (v != 1)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_CONSUME);
+  if (v != 3)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, 4, __ATOMIC_ACQUIRE);
+  if (v != 7)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, 8, __ATOMIC_RELEASE);
+  if (v != 15)
+    abort ();
+
+  count *= 2;
+  __atomic_or_fetch (&v, count, __ATOMIC_ACQ_REL);
+  if (v != 31)
+    abort ();
+
+  count *= 2;
+  __atomic_fetch_or (&v, count, __ATOMIC_SEQ_CST);
+  if (v != 63)
+    abort ();
+}
+
+main ()
+{
+  test_fetch_add ();
+  test_fetch_sub ();
+  test_fetch_and ();
+  test_fetch_nand ();
+  test_fetch_xor ();
+  test_fetch_or ();
+
+  test_add_fetch ();
+  test_sub_fetch ();
+  test_and_fetch ();
+  test_nand_fetch ();
+  test_xor_fetch ();
+  test_or_fetch ();
+
+  test_add ();
+  test_sub ();
+  test_and ();
+  test_nand ();
+  test_xor ();
+  test_or ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-param.c b/gcc/testsuite/gcc.dg/atomic-param.c
new file mode 100644
index 0000000..a1bfc6b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-param.c
@@ -0,0 +1,13 @@
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+int i;
+
+main ()
+{
+
+  __atomic_exchange_n (&i, 1); /* { dg-error "too few arguments" } */
+  __atomic_exchange_n (&i, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); /* { dg-error "too many arguments" } */
+}
diff --git a/gcc/testsuite/gcc.dg/atomic-store-1.c b/gcc/testsuite/gcc.dg/atomic-store-1.c
new file mode 100644
index 0000000..f99eb9c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-store-1.c
@@ -0,0 +1,47 @@
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-store-2.c b/gcc/testsuite/gcc.dg/atomic-store-2.c
new file mode 100644
index 0000000..da346fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-store-2.c
@@ -0,0 +1,46 @@
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-store-3.c b/gcc/testsuite/gcc.dg/atomic-store-3.c
new file mode 100644
index 0000000..b691da4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-store-3.c
@@ -0,0 +1,47 @@
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_store_n builtin for an int.  */
+
+extern void abort(void);
+
+int v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-store-4.c b/gcc/testsuite/gcc.dg/atomic-store-4.c
new file mode 100644
index 0000000..f77e183
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-store-4.c
@@ -0,0 +1,48 @@
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_store_n builtin for a long long.  */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/atomic-store-5.c b/gcc/testsuite/gcc.dg/atomic-store-5.c
new file mode 100644
index 0000000..f976a05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic-store-5.c
@@ -0,0 +1,48 @@
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_store_n builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/gomp/atomic-11.c b/gcc/testsuite/gcc.dg/gomp/atomic-11.c
deleted file mode 100644
index b5647b0..0000000
--- a/gcc/testsuite/gcc.dg/gomp/atomic-11.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/* PR middle-end/36877 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-/* { dg-options "-fopenmp -march=i386" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
-
-int i;
-float f;
-
-void foo (void)
-{
-#pragma omp atomic
-  i++;
-#pragma omp atomic
-  f += 1.0;
-}
-
-/* { dg-final { scan-assembler-not "__sync_(fetch|add|bool|val)" { target i?86-*-* x86_64-*-* powerpc*-*-* ia64-*-* s390*-*-* sparc*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/gomp.exp b/gcc/testsuite/gcc.dg/gomp/gomp.exp
index e4f31cc..4cb4caf 100644
--- a/gcc/testsuite/gcc.dg/gomp/gomp.exp
+++ b/gcc/testsuite/gcc.dg/gomp/gomp.exp
@@ -29,8 +29,7 @@ if ![check_effective_target_fopenmp] {
 dg-init
 
 # Main loop.
-dg-runtest [lsort [find $srcdir/$subdir *.c]] \
-	"" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"
 
 # All done.
 dg-finish
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
new file mode 100644
index 0000000..d03e831
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
@@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned int ret;
+unsigned int value = 0;
+unsigned int result = 0;
+unsigned int table[16] = {
+0x00000000, 
+0x11111111, 
+0x22222222, 
+0x33333333,
+0x44444444,
+0x55555555,
+0x66666666,
+0x77777777,
+0x88888888,
+0x99999999,
+0xAAAAAAAA,
+0xBBBBBBBB,
+0xCCCCCCCC,
+0xDDDDDDDD,
+0xEEEEEEEE,
+0xFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
new file mode 100644
index 0000000..3ade0d6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
@@ -0,0 +1,132 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+__int128_t ret;
+__int128_t value = 0;
+__int128_t result = 0;
+__int128_t table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Since we don't have 128 bit constants, we have to properly pad the table.  */
+void fill_table()
+{
+  int x;
+  for (x = 0; x < 16; x++)
+    {
+      ret = table[x];
+      ret = (ret << 64) | ret;
+      table[x] = ret;
+    }
+}
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Make sure value starts with an atomic value now.  */
+  __atomic_store_n (&value, ret, __ATOMIC_SEQ_CST);
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  fill_table ();
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
new file mode 100644
index 0000000..8bc2eaa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
@@ -0,0 +1,117 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned long long ret;
+unsigned long long value = 0;
+unsigned long long result = 0;
+unsigned long long table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
new file mode 100644
index 0000000..e7b54c4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
@@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_char_short } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned short ret;
+unsigned short value = 0;
+unsigned short result = 0;
+unsigned short table[16] = {
+0x0000, 
+0x1111, 
+0x2222, 
+0x3333,
+0x4444,
+0x5555,
+0x6666,
+0x7777,
+0x8888,
+0x9999,
+0xAAAA,
+0xBBBB,
+0xCCCC,
+0xDDDD,
+0xEEEE,
+0xFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
new file mode 100644
index 0000000..990310c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
@@ -0,0 +1,118 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 4 byte values.  */
+
+unsigned int zero = 0;
+unsigned int max = ~0;
+
+unsigned int changing_value = 0;
+unsigned int value = 0;
+unsigned int ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and
+   'max'. Any other value detected by simulate_thread_step_verify()
+   between instructions would indicate that the value was only
+   partially written, and would thus fail this atomicity test.
+
+   This function tests each different __atomic routine once, with
+   the exception of the load instruction which requires special
+   testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
new file mode 100644
index 0000000..67f84a1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
@@ -0,0 +1,116 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-*] } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 16 byte values.  */
+
+__int128_t zero = 0;
+__int128_t max = ~0;
+__int128_t changing_value = 0;
+__int128_t value = 0;
+__int128_t ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
new file mode 100644
index 0000000..ac4330b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
@@ -0,0 +1,117 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 8 byte values.  */
+
+unsigned long long zero = 0;
+unsigned long long max = ~0;
+
+unsigned long long changing_value = 0;
+unsigned long long value = 0;
+unsigned long long ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-short.c b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-short.c
new file mode 100644
index 0000000..d823e02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-short.c
@@ -0,0 +1,117 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_char_short } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 2 byte values.  */
+
+unsigned short zero = 0;
+unsigned short max = ~0;
+
+unsigned short changing_value = 0;
+unsigned short value = 0;
+unsigned short ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and
+   'max'. Any other value detected by simulate_thread_step_verify()
+   between instructions would indicate that the value was only
+   partially written, and would thus fail this atomicity test.
+
+   This function tests each different __atomic routine once, with
+   the exception of the load instruction which requires special
+   testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c
new file mode 100644
index 0000000..71d1cca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c
@@ -0,0 +1,57 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* This file tests that speculative store movement out of a loop doesn't 
+   happen.  This is disallowed when --param allow-store-data-races is 0.  */
+
+int global = 100;
+
+/* Other thread makes sure global is 100 before the next instruction is
+ * exceuted.  */
+void simulate_thread_other_threads() 
+{
+  global = 100;
+}
+
+int simulate_thread_step_verify()
+{
+  if (global != 100)
+    {
+      printf("FAIL: global variable was assigned to.  \n");
+      return 1;
+    }
+}
+
+int simulate_thread_final_verify()
+{
+  return 0;
+}
+
+/* The variable global should never be assigned if func(0) is called.
+   This tests store movement out of loop thats never executed. */
+void test (int y)
+{
+  int x;
+  for (x=0; x< y; x++)
+    {
+       global = y;   /* This should never speculatively execute.  */
+    }
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  test(0);
+  simulate_thread_done();
+}
+
+__attribute__((noinline))
+int main()
+{
+  simulate_thread_main();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/strict-align-global.c b/gcc/testsuite/gcc.dg/simulate-thread/strict-align-global.c
new file mode 100644
index 0000000..fdcd7f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/strict-align-global.c
@@ -0,0 +1,52 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-packed-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* This test verifies writes to globals do not write to adjacent
+   globals.  This mostly happens on strict-align targets that are not
+   byte addressable (old Alphas, etc).  */
+
+char a = 0;
+char b = 77;
+
+void simulate_thread_other_threads() 
+{
+}
+
+int simulate_thread_step_verify()
+{
+  if (b != 77)
+    {
+      printf("FAIL: Unexpected value.  <b> is %d, should be 77\n", b);
+      return 1;
+    }
+  return 0;
+}
+
+/* Verify that every variable has the correct value.  */
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify ();
+  if (a != 66)
+    {
+      printf("FAIL: Unexpected value.  <a> is %d, should be 66\n", a);
+      return 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  a = 66;
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/simulate-thread/subfields.c b/gcc/testsuite/gcc.dg/simulate-thread/subfields.c
new file mode 100644
index 0000000..2d93117
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/simulate-thread/subfields.c
@@ -0,0 +1,93 @@
+/* { dg-do link } */
+/* { dg-options "--param allow-packed-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* This test verifies that data races aren't introduced by structure subfield 
+   stores. */
+
+struct test_struct {
+  char a;
+  char b;
+  char c;
+  char d;
+} var = {0,0,0,0};
+
+
+/* This routine sets field a to 'x'.  If executed properly, it will
+   not affect any of the other fields in the structure.  An improper
+   implementation may load an entire word, change the 8 bits for field
+   'a' and write the entire word back out. */
+__attribute__((noinline))
+void set_a(char x)
+{
+  var.a = x;
+}
+
+static int global = 0;
+
+/* The other thread increments the value of each of the other fields
+   in the structure every cycle.  If the store to the 'a' field does
+   an incorrect full or partial word load, mask and store, it will
+   write back an incorrect value to one or more of the other
+   fields.  */
+void simulate_thread_other_threads() 
+{
+  global++;
+  var.b = global;
+  var.c = global;
+  var.d = global;
+}
+
+
+/* Make sure that none of the other fields have been changed.  */
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf("FAIL: Unexpected value. var.b is %d, should be %d\n",
+	     var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf("FAIL: Unexpected value. var.c is %d, should be %d\n",
+	     var.c, global);
+      ret = 1;
+    }
+  if (var.d != global)
+    {
+      printf("FAIL: Unexpected value. var.d is %d, should be %d\n",
+	     var.d, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+/* Verify that every variable has the correct value.  */
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != 1)
+    {
+      printf("FAIL: Unexpected value. var.a is %d, should be %d\n", var.a, 1);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a(1);
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 1ba71f0..74a4c95 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -655,6 +655,28 @@ proc check_effective_target_tls_runtime {} {
     } [add_options_for_tls ""]]
 }
 
+# Return 1 if atomic compare-and-swap is supported on 'int'
+
+proc check_effective_target_cas_char {} {
+    return [check_no_compiler_messages cas_char assembly {
+	#ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
+	#error unsupported
+	#endif
+    } ""]
+}
+
+proc check_effective_target_cas_int {} {
+    return [check_no_compiler_messages cas_int assembly {
+	#if __INT_MAX__ == 0x7fff && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
+        /* ok */
+        #elif __INT_MAX__ == 0x7fffffff && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+	/* ok */
+	#else
+	#error unsupported
+	#endif
+    } ""]
+}
+
 # Return 1 if -ffunction-sections is supported, 0 otherwise.
 
 proc check_effective_target_function_sections {} {
@@ -3517,6 +3539,44 @@ proc check_effective_target_section_anchors { } {
     return $et_section_anchors_saved
 }
 
+# Return 1 if the target supports atomic operations on "int_128" values.
+
+proc check_effective_target_sync_int_128 { } {
+    global et_sync_int_128_saved
+
+    if [info exists et_sync_int_128_saved] {
+        verbose "check_effective_target_sync_int_128: using cached result" 2
+    } else {
+        set et_sync_int_128_saved 0
+        if { ([istarget x86_64-*-*] || [istarget i?86-*-*])
+	     && ![is-effective-target ia32] } {
+           set et_sync_int_128_saved 1
+        }
+    }
+
+    verbose "check_effective_target_sync_int_128: returning $et_sync_int_128_saved" 2
+    return $et_sync_int_128_saved
+}
+
+# Return 1 if the target supports atomic operations on "long long".
+
+proc check_effective_target_sync_long_long { } {
+    global et_sync_long_long_saved
+
+    if [info exists et_sync_long_long_saved] {
+        verbose "check_effective_target_sync_long_long: using cached result" 2
+    } else {
+        set et_sync_long_long_saved 0
+        if { ([istarget x86_64-*-*] || [istarget i?86-*-*])
+	     && ![is-effective-target ia32] } {
+           set et_sync_long_long_saved 1
+        }
+    }
+
+    verbose "check_effective_target_sync_long_long: returning $et_sync_long_long_saved" 2
+    return $et_sync_long_long_saved
+}
+
 # Return 1 if the target supports atomic operations on "int" and "long".
 
 proc check_effective_target_sync_int_long { } {
author	Andrew Macleod <amacleod@gcc.gnu.org>	2011-11-06 14:55:48 +0000
committer	Andrew Macleod <amacleod@gcc.gnu.org>	2011-11-06 14:55:48 +0000
commit	86951993f8a4cae2fb26bf8705e2f248a8d6f21e (patch)
tree	c0f499483e35c60c1b9f065f10a630e6fa4345bc /gcc
parent	a8a058f6523f1e0f7b69ec1837848e55cf9f0856 (diff)
download	gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.zip gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.tar.gz gcc-86951993f8a4cae2fb26bf8705e2f248a8d6f21e.tar.bz2