aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.vnet.ibm.com>2014-01-24 01:56:48 +0000
committerMichael Meissner <meissner@gcc.gnu.org>2014-01-24 01:56:48 +0000
commitb846c948f29c4b56d6c14a509df4164049dabbd3 (patch)
tree0db8047bb3fba3f07c9e23a6a12f99413bbfbd80 /gcc/config
parent9eb3a1d30b85f2ec0e94b5ea4e1539be82ac51c2 (diff)
downloadgcc-b846c948f29c4b56d6c14a509df4164049dabbd3.zip
gcc-b846c948f29c4b56d6c14a509df4164049dabbd3.tar.gz
gcc-b846c948f29c4b56d6c14a509df4164049dabbd3.tar.bz2
re PR target/59909 (Quad memory bootstrap issues on little endian powerpc64 power8 systems)
[gcc] 2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/59909 * doc/invoke.texi (RS/6000 and PowerPC Options): Document -mquad-memory-atomic. Update -mquad-memory documentation to say it is only used for non-atomic loads/stores. * config/rs6000/predicates.md (quad_int_reg_operand): Allow either -mquad-memory or -mquad-memory-atomic switches. * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Add -mquad-memory-atomic to ISA 2.07 support. * config/rs6000/rs6000.opt (-mquad-memory-atomic): Add new switch to separate support of normal quad word memory operations (ldq, stq) from the atomic quad word memory operations. * config/rs6000/rs6000.c (rs6000_option_override_internal): Add support to separate non-atomic quad word operations from atomic quad word operations. Disable non-atomic quad word operations in little endian mode so that we don't have to swap words after the load and before the store. (quad_load_store_p): Add comment about atomic quad word support. (rs6000_opt_masks): Add -mquad-memory-atomic to the list of options printed with -mdebug=reg. * config/rs6000/rs6000.h (TARGET_SYNC_TI): Use -mquad-memory-atomic as the test for whether we have quad word atomic instructions. (TARGET_SYNC_HI_QI): If either -mquad-memory-atomic, -mquad-memory, or -mp8-vector are used, allow byte/half-word atomic operations. * config/rs6000/sync.md (load_lockedti): Insure that the address is a proper indexed or indirect address for the lqarx instruction. On little endian systems, swap the hi/lo registers after the lqarx instruction. (load_lockedpti): Use indexed_or_indirect_operand predicate to insure the address is valid for the lqarx instruction. (store_conditionalti): Insure that the address is a proper indexed or indirect address for the stqcrx. instruction. On little endian systems, swap the hi/lo registers before doing the stqcrx. instruction. (store_conditionalpti): Use indexed_or_indirect_operand predicate to insure the address is valid for the stqcrx. instruction. * gcc/config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define __QUAD_MEMORY__ and __QUAD_MEMORY_ATOMIC__ based on what type of quad memory support is available. [gcc/testsuite] 2014-01-23 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/59909 * gcc.target/powerpc/quad-atomic.c: New file to test power8 quad word atomic functions at runtime. From-SVN: r207020
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/rs6000/predicates.md3
-rw-r--r--gcc/config/rs6000/rs6000-c.c4
-rw-r--r--gcc/config/rs6000/rs6000-cpus.def3
-rw-r--r--gcc/config/rs6000/rs6000.c29
-rw-r--r--gcc/config/rs6000/rs6000.h7
-rw-r--r--gcc/config/rs6000/rs6000.opt6
-rw-r--r--gcc/config/rs6000/sync.md76
7 files changed, 105 insertions, 23 deletions
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 0bfc85e..7b1121d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -270,7 +270,7 @@
{
HOST_WIDE_INT r;
- if (!TARGET_QUAD_MEMORY)
+ if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
return 0;
if (GET_CODE (op) == SUBREG)
@@ -624,6 +624,7 @@
(match_test "offsettable_nonstrict_memref_p (op)")))
;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores.
(define_predicate "quad_memory_operand"
(match_code "mem")
{
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 2072b76..acdd4b4 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -339,6 +339,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
rs6000_define_or_undefine_macro (define_p, "__HTM__");
if ((flags & OPTION_MASK_P8_VECTOR) != 0)
rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
if ((flags & OPTION_MASK_CRYPTO) != 0)
rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index bf109a0..b17fd0d 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -53,7 +53,8 @@
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_HTM \
- | OPTION_MASK_QUAD_MEMORY)
+ | OPTION_MASK_QUAD_MEMORY \
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC)
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 75fa19e..867b8e8 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3357,14 +3357,37 @@ rs6000_option_override_internal (bool global_init_p)
/* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
silently turn off quad memory mode. */
- if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
warning (0, N_("-mquad-memory requires 64-bit mode"));
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+ }
+
+ /* Non-atomic quad memory load/store are disabled for little endian, since
+ the words are reversed, but atomic operations can still be done by
+ swapping the words. */
+ if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory is not available in little endian mode"));
+
rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
}
+ /* Assume if the user asked for normal quad memory instructions, they want
+ the atomic versions as well, unless they explicity told us not to use quad
+ word atomic instructions. */
+ if (TARGET_QUAD_MEMORY
+ && !TARGET_QUAD_MEMORY_ATOMIC
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+ rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
/* Enable power8 fusion if we are tuning for power8, even if we aren't
generating power8 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
@@ -5940,7 +5963,8 @@ direct_move_p (rtx op0, rtx op1)
return false;
}
-/* Return true if this is a load or store quad operation. */
+/* Return true if this is a load or store quad operation. This function does
+ not handle the atomic quad memory instructions. */
bool
quad_load_store_p (rtx op0, rtx op1)
@@ -30754,6 +30778,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
+ { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "string", OPTION_MASK_STRING, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 1a37a5d..5e30879 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -533,8 +533,11 @@ extern int rs6000_vector_align[];
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
memory support. */
-#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
-#define TARGET_SYNC_TI TARGET_QUAD_MEMORY
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY \
+ || TARGET_QUAD_MEMORY_ATOMIC \
+ || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY_ATOMIC
/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
to allocate the SDmode stack slot to get the value into the proper location
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3240a75..4c1a02a 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -571,7 +571,11 @@ Use ISA 2.07 transactional memory (HTM) instructions
mquad-memory
Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
-Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
mcompat-align-parm
Target Report Var(rs6000_compat_align_parm) Init(0) Save
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 45de1bd..7db4390 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -204,25 +204,46 @@
"<QHI:larx> %0,%y1"
[(set_attr "type" "load_l")])
-;; Use PTImode to get even/odd register pairs
+;; Use PTImode to get even/odd register pairs.
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
(define_expand "load_lockedti"
[(use (match_operand:TI 0 "quad_int_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))]
"TARGET_SYNC_TI"
{
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
rtx pti = gen_reg_rtx (PTImode);
- emit_insn (gen_load_lockedpti (pti, operands[1]));
- emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+
+ if (!indexed_or_indirect_operand (op1, TImode))
+ {
+ rtx old_addr = XEXP (op1, 0);
+ rtx new_addr = force_reg (Pmode, old_addr);
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
+ }
+
+ emit_insn (gen_load_lockedpti (pti, op1));
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (op0, gen_lowpart (TImode, pti));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
+ emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+ }
DONE;
})
(define_insn "load_lockedpti"
[(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
(unspec_volatile:PTI
- [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
"TARGET_SYNC_TI
&& !reg_mentioned_p (operands[0], operands[1])
&& quad_int_reg_operand (operands[0], PTImode)"
@@ -238,6 +259,14 @@
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
(define_expand "store_conditionalti"
[(use (match_operand:CC 0 "cc_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))
@@ -247,21 +276,36 @@
rtx op0 = operands[0];
rtx op1 = operands[1];
rtx op2 = operands[2];
- rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
- rtx pti_op2 = gen_reg_rtx (PTImode);
-
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
- emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
- emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+ rtx addr = XEXP (op1, 0);
+ rtx pti_mem;
+ rtx pti_reg;
+
+ if (!indexed_or_indirect_operand (op1, TImode))
+ {
+ rtx new_addr = force_reg (Pmode, addr);
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
+ addr = new_addr;
+ }
+
+ pti_mem = change_address (op1, PTImode, addr);
+ pti_reg = gen_reg_rtx (PTImode);
+
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
+ emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+ }
+
+ emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
DONE;
})
(define_insn "store_conditionalpti"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
- (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
"TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
"stqcx. %2,%y1"