aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2018-10-31 09:58:48 +0000
committerRichard Henderson <rth@gcc.gnu.org>2018-10-31 02:58:48 -0700
commit7803ec5ee2a547043fb6708a08ddb1361ba91202 (patch)
treedcf5f28df3603081cc04cbce0864e469f2e8d39c
parent8f5603d363a4e0453d2c38c7103aeb0bdca85c4e (diff)
downloadgcc-7803ec5ee2a547043fb6708a08ddb1361ba91202.zip
gcc-7803ec5ee2a547043fb6708a08ddb1361ba91202.tar.gz
gcc-7803ec5ee2a547043fb6708a08ddb1361ba91202.tar.bz2
aarch64: Improve atomic-op lse generation
Fix constraints; avoid unnecessary split. Drop the use of the atomic_op iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more logical for ldclr aka bic. * config/aarch64/aarch64.c (aarch64_emit_bic): Remove. (aarch64_atomic_ldop_supported_p): Remove. (aarch64_gen_atomic_ldop): Remove. * config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>): Fully expand LSE operations here. (atomic_fetch_<atomic_optab><ALLI>): Likewise. (atomic_<atomic_optab>_fetch<ALLI>): Likewise. (aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator and use ATOMIC_LDOP instead; use register_operand for the input; drop the split and emit insns directly. (aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise. (aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove. (@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove. From-SVN: r265660
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/aarch64/aarch64-protos.h2
-rw-r--r--gcc/config/aarch64/aarch64.c176
-rw-r--r--gcc/config/aarch64/atomics.md197
-rw-r--r--gcc/config/aarch64/iterators.md5
5 files changed, 122 insertions, 272 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a3f9048..bec7124 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
+ * config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
+ (aarch64_atomic_ldop_supported_p): Remove.
+ (aarch64_gen_atomic_ldop): Remove.
+ * config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
+ Fully expand LSE operations here.
+ (atomic_fetch_<atomic_optab><ALLI>): Likewise.
+ (atomic_<atomic_optab>_fetch<ALLI>): Likewise.
+ (aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
+ and use ATOMIC_LDOP instead; use register_operand for the input;
+ drop the split and emit insns directly.
+ (aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
+ (aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
+ (@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.
+
* config/aarch64/aarch64.c (aarch64_emit_atomic_swap): Remove.
(aarch64_gen_atomic_ldop): Don't call it.
* config/aarch64/atomics.md (atomic_exchange<ALLI>):
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f662533..288efe9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -564,8 +564,6 @@ rtx aarch64_load_tp (rtx);
void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
-bool aarch64_atomic_ldop_supported_p (enum rtx_code);
-void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e9829ab..e646cce 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14663,32 +14663,6 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (bval, x));
}
-/* Test whether the target supports using a atomic load-operate instruction.
- CODE is the operation and AFTER is TRUE if the data in memory after the
- operation should be returned and FALSE if the data before the operation
- should be returned. Returns FALSE if the operation isn't supported by the
- architecture. */
-
-bool
-aarch64_atomic_ldop_supported_p (enum rtx_code code)
-{
- if (!TARGET_LSE)
- return false;
-
- switch (code)
- {
- case SET:
- case AND:
- case IOR:
- case XOR:
- case MINUS:
- case PLUS:
- return true;
- default:
- return false;
- }
-}
-
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation. */
@@ -14821,156 +14795,6 @@ aarch64_split_compare_and_swap (rtx operands[])
aarch64_emit_post_barrier (model);
}
-/* Emit a BIC instruction. */
-
-static void
-aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
-{
- rtx shift_rtx = GEN_INT (shift);
- rtx (*gen) (rtx, rtx, rtx, rtx);
-
- switch (mode)
- {
- case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
- case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
- default:
- gcc_unreachable ();
- }
-
- emit_insn (gen (dst, s2, shift_rtx, s1));
-}
-
-/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
- location to store the data read from memory. OUT_RESULT is the location to
- store the result of the operation. MEM is the memory location to read and
- modify. MODEL_RTX is the memory ordering to use. VALUE is the second
- operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
- be NULL. */
-
-void
-aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
- rtx mem, rtx value, rtx model_rtx)
-{
- machine_mode mode = GET_MODE (mem);
- machine_mode wmode = (mode == DImode ? DImode : SImode);
- const bool short_mode = (mode < SImode);
- int ldop_code;
- rtx src;
- rtx x;
-
- if (out_data)
- out_data = gen_lowpart (mode, out_data);
-
- if (out_result)
- out_result = gen_lowpart (mode, out_result);
-
- /* Make sure the value is in a register, putting it into a destination
- register if it needs to be manipulated. */
- if (!register_operand (value, mode)
- || code == AND || code == MINUS)
- {
- src = out_result ? out_result : out_data;
- emit_move_insn (src, gen_lowpart (mode, value));
- }
- else
- src = value;
- gcc_assert (register_operand (src, mode));
-
- /* Preprocess the data for the operation as necessary. If the operation is
- a SET then emit a swap instruction and finish. */
- switch (code)
- {
- case MINUS:
- /* Negate the value and treat it as a PLUS. */
- {
- rtx neg_src;
-
- /* Resize the value if necessary. */
- if (short_mode)
- src = gen_lowpart (wmode, src);
-
- neg_src = gen_rtx_NEG (wmode, src);
- emit_insn (gen_rtx_SET (src, neg_src));
-
- if (short_mode)
- src = gen_lowpart (mode, src);
- }
- /* Fall-through. */
- case PLUS:
- ldop_code = UNSPECV_ATOMIC_LDOP_PLUS;
- break;
-
- case IOR:
- ldop_code = UNSPECV_ATOMIC_LDOP_OR;
- break;
-
- case XOR:
- ldop_code = UNSPECV_ATOMIC_LDOP_XOR;
- break;
-
- case AND:
- {
- rtx not_src;
-
- /* Resize the value if necessary. */
- if (short_mode)
- src = gen_lowpart (wmode, src);
-
- not_src = gen_rtx_NOT (wmode, src);
- emit_insn (gen_rtx_SET (src, not_src));
-
- if (short_mode)
- src = gen_lowpart (mode, src);
- }
- ldop_code = UNSPECV_ATOMIC_LDOP_BIC;
- break;
-
- default:
- /* The operation can't be done with atomic instructions. */
- gcc_unreachable ();
- }
-
- emit_insn (gen_aarch64_atomic_load (ldop_code, mode,
- out_data, mem, src, model_rtx));
-
- /* If necessary, calculate the data in memory after the update by redoing the
- operation from values in registers. */
- if (!out_result)
- return;
-
- if (short_mode)
- {
- src = gen_lowpart (wmode, src);
- out_data = gen_lowpart (wmode, out_data);
- out_result = gen_lowpart (wmode, out_result);
- }
-
- x = NULL_RTX;
-
- switch (code)
- {
- case MINUS:
- case PLUS:
- x = gen_rtx_PLUS (wmode, out_data, src);
- break;
- case IOR:
- x = gen_rtx_IOR (wmode, out_data, src);
- break;
- case XOR:
- x = gen_rtx_XOR (wmode, out_data, src);
- break;
- case AND:
- aarch64_emit_bic (wmode, out_result, out_data, src, 0);
- return;
- default:
- gcc_unreachable ();
- }
-
- emit_set_insn (out_result, x);
-
- return;
-}
-
/* Split an atomic operation. */
void
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index bc9e396..2198649 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -207,13 +207,37 @@
rtx (*gen) (rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
- if (aarch64_atomic_ldop_supported_p (<CODE>))
- gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+ if (TARGET_LSE)
+ {
+ switch (<CODE>)
+ {
+ case MINUS:
+ operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+ NULL, 1);
+ /* fallthru */
+ case PLUS:
+ gen = gen_aarch64_atomic_add<mode>_lse;
+ break;
+ case IOR:
+ gen = gen_aarch64_atomic_ior<mode>_lse;
+ break;
+ case XOR:
+ gen = gen_aarch64_atomic_xor<mode>_lse;
+ break;
+ case AND:
+ operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+ NULL, 1);
+ gen = gen_aarch64_atomic_bic<mode>_lse;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ }
else
gen = gen_aarch64_atomic_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2]));
-
DONE;
}
)
@@ -239,22 +263,25 @@
}
)
-(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
+(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
- (unspec_volatile:ALLI
- [(atomic_op:ALLI (match_dup 0)
- (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
- (match_operand:SI 2 "const_int_operand")]
- UNSPECV_ATOMIC_OP))
+ (unspec_volatile:ALLI
+ [(match_dup 0)
+ (match_operand:ALLI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand")]
+ ATOMIC_LDOP))
(clobber (match_scratch:ALLI 3 "=&r"))]
"TARGET_LSE"
- "#"
- "&& reload_completed"
- [(const_int 0)]
{
- aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
- operands[1], operands[2]);
- DONE;
+ enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+ if (is_mm_relaxed (model))
+ return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
+ else if (is_mm_release (model))
+ return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
+ else
+ return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
}
)
@@ -280,7 +307,7 @@
}
)
-;; Load-operate-store, returning the updated memory data.
+;; Load-operate-store, returning the original memory data.
(define_expand "atomic_fetch_<atomic_optab><mode>"
[(match_operand:ALLI 0 "register_operand" "")
@@ -293,13 +320,37 @@
rtx (*gen) (rtx, rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
- if (aarch64_atomic_ldop_supported_p (<CODE>))
- gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+ if (TARGET_LSE)
+ {
+ switch (<CODE>)
+ {
+ case MINUS:
+ operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+ NULL, 1);
+ /* fallthru */
+ case PLUS:
+ gen = gen_aarch64_atomic_fetch_add<mode>_lse;
+ break;
+ case IOR:
+ gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
+ break;
+ case XOR:
+ gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
+ break;
+ case AND:
+ operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+ NULL, 1);
+ gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
else
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-
DONE;
})
@@ -326,23 +377,26 @@
}
)
-(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
+ [(set (match_operand:ALLI 0 "register_operand" "=r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
- (unspec_volatile:ALLI
- [(atomic_op:ALLI (match_dup 1)
- (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
- (match_operand:SI 3 "const_int_operand")]
- UNSPECV_ATOMIC_LDOP))]
+ (unspec_volatile:ALLI
+ [(match_dup 1)
+ (match_operand:ALLI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand")]
+ ATOMIC_LDOP))]
"TARGET_LSE"
- "#"
- "&& reload_completed"
- [(const_int 0)]
{
- aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
- operands[2], operands[3]);
- DONE;
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_release (model))
+ return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else
+ return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
}
)
@@ -370,7 +424,7 @@
}
)
-;; Load-operate-store, returning the original memory data.
+;; Load-operate-store, returning the updated memory data.
(define_expand "atomic_<atomic_optab>_fetch<mode>"
[(match_operand:ALLI 0 "register_operand" "")
@@ -380,17 +434,23 @@
(match_operand:SI 3 "const_int_operand")]
""
{
- rtx (*gen) (rtx, rtx, rtx, rtx);
- rtx value = operands[2];
-
- /* Use an atomic load-operate instruction when possible. */
- if (aarch64_atomic_ldop_supported_p (<CODE>))
- gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+ /* Use an atomic load-operate instruction when possible. In this case
+ we will re-compute the result from the original mem value. */
+ if (TARGET_LSE)
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ emit_insn (gen_atomic_fetch_<atomic_optab><mode>
+ (tmp, operands[1], operands[2], operands[3]));
+ tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+ operands[0], 1, OPTAB_WIDEN);
+ emit_move_insn (operands[0], tmp);
+ }
else
- gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
-
- emit_insn (gen (operands[0], operands[1], value, operands[3]));
-
+ {
+ emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
+ (operands[0], operands[1], operands[2], operands[3]));
+ }
DONE;
})
@@ -417,29 +477,6 @@
}
)
-(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
- (atomic_op:ALLI
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
- (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
- (set (match_dup 1)
- (unspec_volatile:ALLI
- [(match_dup 1)
- (match_dup 2)
- (match_operand:SI 3 "const_int_operand")]
- UNSPECV_ATOMIC_LDOP))
- (clobber (match_scratch:ALLI 4 "=&r"))]
- "TARGET_LSE"
- "#"
- "&& reload_completed"
- [(const_int 0)]
- {
- aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
- operands[2], operands[3]);
- DONE;
- }
-)
-
(define_insn_and_split "atomic_nand_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(not:ALLI
@@ -585,29 +622,3 @@
return "dmb\\tish";
}
)
-
-;; ARMv8.1-A LSE instructions.
-
-;; Atomic load-op: Load data, operate, store result, keep data.
-
-(define_insn "@aarch64_atomic_load<atomic_ldop><mode>"
- [(set (match_operand:ALLI 0 "register_operand" "=r")
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
- (set (match_dup 1)
- (unspec_volatile:ALLI
- [(match_dup 1)
- (match_operand:ALLI 2 "register_operand")
- (match_operand:SI 3 "const_int_operand")]
- ATOMIC_LDOP))]
- "TARGET_LSE && reload_completed"
- {
- enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
- if (is_mm_relaxed (model))
- return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
- else if (is_mm_acquire (model) || is_mm_consume (model))
- return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
- else if (is_mm_release (model))
- return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
- else
- return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
- })
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a439560..524e4e6 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -503,7 +503,6 @@
UNSPECV_ATOMIC_CAS ; Represent an atomic CAS.
UNSPECV_ATOMIC_SWP ; Represent an atomic SWP.
UNSPECV_ATOMIC_OP ; Represent an atomic operation.
- UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation
UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or
UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic
UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor
@@ -1591,6 +1590,10 @@
[(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
(UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+(define_int_attr atomic_ldoptab
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
+ (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
;; -------------------------------------------------------------------
;; Int Iterators Attributes.
;; -------------------------------------------------------------------