From 641c2f8b69f799a00d0fda696d480e10505257c3 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Tue, 22 Sep 2015 09:35:17 +0000 Subject: [AArch64] Use atomic load-operate instructions for fetch-update patterns. gcc/ 2015-09-22 Matthew Wahab * config/aarch64/aarch64-protos.h (aarch64_atomic_ldop_supported_p): Declare. * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New. (enum aarch64_atomic_load_op_code): New. (aarch64_emit_atomic_load_op): New. (aarch64_gen_atomic_ldop): Update to support load-operate patterns. * config/aarch64/atomics.md (atomic_): Change to an expander. (aarch64_atomic_): New. (aarch64_atomic__lse): New. (atomic_fetch_): Change to an expander. (aarch64_atomic_fetch_): New. (aarch64_atomic_fetch__lse): New. gcc/testsuite/ 2015-09-22 Matthew Wahab * gcc.target/aarch64/atomic-inst-ldadd.c: New. * gcc.target/aarch64/atomic-inst-ldlogic.c: New. From-SVN: r228001 --- gcc/ChangeLog | 17 ++ gcc/config/aarch64/aarch64-protos.h | 2 + gcc/config/aarch64/aarch64.c | 175 ++++++++++++++++++++- gcc/config/aarch64/atomics.md | 101 ++++++++++-- gcc/testsuite/ChangeLog | 5 + .../gcc.target/aarch64/atomic-inst-ldadd.c | 58 +++++++ .../gcc.target/aarch64/atomic-inst-ldlogic.c | 109 +++++++++++++ 7 files changed, 455 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 27e773b..ad6ec7f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,22 @@ 2015-09-22 Matthew Wahab + * config/aarch64/aarch64-protos.h + (aarch64_atomic_ldop_supported_p): Declare. + * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New. + (enum aarch64_atomic_load_op_code): New. + (aarch64_emit_atomic_load_op): New. + (aarch64_gen_atomic_ldop): Update to support load-operate + patterns. + * config/aarch64/atomics.md (atomic_): Change + to an expander. + (aarch64_atomic_): New. + (aarch64_atomic__lse): New. + (atomic_fetch_): Change to an expander. + (aarch64_atomic_fetch_): New. + (aarch64_atomic_fetch__lse): New. + +2015-09-22 Matthew Wahab + * config/aarch64/aarch64/atomics.md (UNSPECV_ATOMIC_LDOP): New. (UNSPECV_ATOMIC_LDOP_OR): New. (UNSPECV_ATOMIC_LDOP_BIC): New. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index eba4c76..76ebd6f 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx); void aarch64_expand_compare_and_swap (rtx op[]); void aarch64_split_compare_and_swap (rtx op[]); void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); + +bool aarch64_atomic_ldop_supported_p (enum rtx_code); void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 34c630b..93e36b7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -10871,6 +10871,32 @@ aarch64_expand_compare_and_swap (rtx operands[]) emit_insn (gen_rtx_SET (bval, x)); } +/* Test whether the target supports using a atomic load-operate instruction. + CODE is the operation and AFTER is TRUE if the data in memory after the + operation should be returned and FALSE if the data before the operation + should be returned. Returns FALSE if the operation isn't supported by the + architecture. */ + +bool +aarch64_atomic_ldop_supported_p (enum rtx_code code) +{ + if (!TARGET_LSE) + return false; + + switch (code) + { + case SET: + case AND: + case IOR: + case XOR: + case MINUS: + case PLUS: + return true; + default: + return false; + } +} + /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a sequence implementing an atomic operation. */ @@ -11013,26 +11039,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value, emit_insn (gen (dst, mem, value, model)); } -/* Emit an atomic operation where the architecture supports it. */ +/* Operations supported by aarch64_emit_atomic_load_op. */ + +enum aarch64_atomic_load_op_code +{ + AARCH64_LDOP_PLUS, /* A + B */ + AARCH64_LDOP_XOR, /* A ^ B */ + AARCH64_LDOP_OR, /* A | B */ + AARCH64_LDOP_BIC /* A & ~B */ +}; + +/* Emit an atomic load-operate. */ + +static void +aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code, + machine_mode mode, rtx dst, rtx src, + rtx mem, rtx model) +{ + typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx); + const aarch64_atomic_load_op_fn plus[] = + { + gen_aarch64_atomic_loadaddqi, + gen_aarch64_atomic_loadaddhi, + gen_aarch64_atomic_loadaddsi, + gen_aarch64_atomic_loadadddi + }; + const aarch64_atomic_load_op_fn eor[] = + { + gen_aarch64_atomic_loadeorqi, + gen_aarch64_atomic_loadeorhi, + gen_aarch64_atomic_loadeorsi, + gen_aarch64_atomic_loadeordi + }; + const aarch64_atomic_load_op_fn ior[] = + { + gen_aarch64_atomic_loadsetqi, + gen_aarch64_atomic_loadsethi, + gen_aarch64_atomic_loadsetsi, + gen_aarch64_atomic_loadsetdi + }; + const aarch64_atomic_load_op_fn bic[] = + { + gen_aarch64_atomic_loadclrqi, + gen_aarch64_atomic_loadclrhi, + gen_aarch64_atomic_loadclrsi, + gen_aarch64_atomic_loadclrdi + }; + aarch64_atomic_load_op_fn gen; + int idx = 0; + + switch (mode) + { + case QImode: idx = 0; break; + case HImode: idx = 1; break; + case SImode: idx = 2; break; + case DImode: idx = 3; break; + default: + gcc_unreachable (); + } + + switch (code) + { + case AARCH64_LDOP_PLUS: gen = plus[idx]; break; + case AARCH64_LDOP_XOR: gen = eor[idx]; break; + case AARCH64_LDOP_OR: gen = ior[idx]; break; + case AARCH64_LDOP_BIC: gen = bic[idx]; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, mem, src, model)); +} + +/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the + location to store the data read from memory. MEM is the memory location to + read and modify. MODEL_RTX is the memory ordering to use. VALUE is the + second operand for the operation. Either OUT_DATA or OUT_RESULT, but not + both, can be NULL. */ void aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx mem, rtx value, rtx model_rtx) { machine_mode mode = GET_MODE (mem); + machine_mode wmode = (mode == DImode ? DImode : SImode); + const bool short_mode = (mode < SImode); + aarch64_atomic_load_op_code ldop_code; + rtx src; + rtx x; + + if (out_data) + out_data = gen_lowpart (mode, out_data); - out_data = gen_lowpart (mode, out_data); + /* Make sure the value is in a register, putting it into a destination + register if it needs to be manipulated. */ + if (!register_operand (value, mode) + || code == AND || code == MINUS) + { + src = out_data; + emit_move_insn (src, gen_lowpart (mode, value)); + } + else + src = value; + gcc_assert (register_operand (src, mode)); + /* Preprocess the data for the operation as necessary. If the operation is + a SET then emit a swap instruction and finish. */ switch (code) { case SET: - aarch64_emit_atomic_swap (mode, out_data, value, mem, model_rtx); + aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx); return; + case MINUS: + /* Negate the value and treat it as a PLUS. */ + { + rtx neg_src; + + /* Resize the value if necessary. */ + if (short_mode) + src = gen_lowpart (wmode, src); + + neg_src = gen_rtx_NEG (wmode, src); + emit_insn (gen_rtx_SET (src, neg_src)); + + if (short_mode) + src = gen_lowpart (mode, src); + } + /* Fall-through. */ + case PLUS: + ldop_code = AARCH64_LDOP_PLUS; + break; + + case IOR: + ldop_code = AARCH64_LDOP_OR; + break; + + case XOR: + ldop_code = AARCH64_LDOP_XOR; + break; + + case AND: + { + rtx not_src; + + /* Resize the value if necessary. */ + if (short_mode) + src = gen_lowpart (wmode, src); + + not_src = gen_rtx_NOT (wmode, src); + emit_insn (gen_rtx_SET (src, not_src)); + + if (short_mode) + src = gen_lowpart (mode, src); + } + ldop_code = AARCH64_LDOP_BIC; + break; + default: /* The operation can't be done with atomic instructions. */ gcc_unreachable (); } + + aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx); } /* Split an atomic operation. */ diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 11a9d13..e0d8856 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -225,23 +225,63 @@ } ) -(define_insn_and_split "atomic_" +(define_expand "atomic_" + [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "") + (atomic_op:ALLI + (match_operand:ALLI 1 "" "") + (match_operand:SI 2 "const_int_operand"))] + "" + { + rtx (*gen) (rtx, rtx, rtx); + + /* Use an atomic load-operate instruction when possible. */ + if (aarch64_atomic_ldop_supported_p ()) + gen = gen_aarch64_atomic__lse; + else + gen = gen_aarch64_atomic_; + + emit_insn (gen (operands[0], operands[1], operands[2])); + + DONE; + } +) + +(define_insn_and_split "aarch64_atomic_" + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") + (unspec_volatile:ALLI + [(atomic_op:ALLI (match_dup 0) + (match_operand:ALLI 1 "" "r")) + (match_operand:SI 2 "const_int_operand")] + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:ALLI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + } +) + +(define_insn_and_split "aarch64_atomic__lse" [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") (unspec_volatile:ALLI [(atomic_op:ALLI (match_dup 0) (match_operand:ALLI 1 "" "r")) - (match_operand:SI 2 "const_int_operand")] ;; model + (match_operand:SI 2 "const_int_operand")] UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:ALLI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] - "" + (clobber (match_scratch:ALLI 3 "=&r"))] + "TARGET_LSE" "#" "&& reload_completed" [(const_int 0)] { - aarch64_split_atomic_op (, NULL, operands[3], operands[0], - operands[1], operands[2], operands[4]); + aarch64_gen_atomic_ldop (, operands[3], operands[0], + operands[1], operands[2]); DONE; } ) @@ -268,7 +308,30 @@ } ) -(define_insn_and_split "atomic_fetch_" +;; Load-operate-store, returning the updated memory data. + +(define_expand "atomic_fetch_" + [(match_operand:ALLI 0 "register_operand" "") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") + (atomic_op:ALLI + (match_operand:ALLI 2 "" "") + (match_operand:SI 3 "const_int_operand"))] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + + /* Use an atomic load-operate instruction when possible. */ + if (aarch64_atomic_ldop_supported_p ()) + gen = gen_aarch64_atomic_fetch__lse; + else + gen = gen_aarch64_atomic_fetch_; + + emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); + + DONE; +}) + +(define_insn_and_split "aarch64_atomic_fetch_" [(set (match_operand:ALLI 0 "register_operand" "=&r") (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) (set (match_dup 1) @@ -291,6 +354,26 @@ } ) +(define_insn_and_split "aarch64_atomic_fetch__lse" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) + (set (match_dup 1) + (unspec_volatile:ALLI + [(atomic_op:ALLI (match_dup 1) + (match_operand:ALLI 2 "" "r")) + (match_operand:SI 3 "const_int_operand")] + UNSPECV_ATOMIC_LDOP))] + "TARGET_LSE" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_gen_atomic_ldop (, operands[0], operands[1], + operands[2], operands[3]); + DONE; + } +) + (define_insn_and_split "atomic_fetch_nand" [(set (match_operand:ALLI 0 "register_operand" "=&r") (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c0e467f..4c5364a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2015-09-22 Matthew Wahab + * gcc.target/aarch64/atomic-inst-ldadd.c: New. + * gcc.target/aarch64/atomic-inst-ldlogic.c: New. + +2015-09-22 Matthew Wahab + * gcc.target/aarch64/atomic-inst-ops.inc: (TEST_MODEL): New. (TEST_ONE): New. * gcc.target/aarch64/atomic-inst-swap.c: New. diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c new file mode 100644 index 0000000..c21d2ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+lse" } */ + +/* Test ARMv8.1-A Load-ADD instruction. */ + +#include "atomic-inst-ops.inc" + +#define TEST TEST_ONE + +#define LOAD_ADD(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_fetch_add (val, foo, MODEL); \ + } + +#define LOAD_ADD_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_fetch_add (val, foo, MODEL); \ + } + +#define LOAD_SUB(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_fetch_sub (val, foo, MODEL); \ + } + +#define LOAD_SUB_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_fetch_sub (val, foo, MODEL); \ + } + + +TEST (load_add, LOAD_ADD) +TEST (load_add_notreturn, LOAD_ADD_NORETURN) + +TEST (load_sub, LOAD_SUB) +TEST (load_sub_notreturn, LOAD_SUB_NORETURN) + +/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */ +/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */ + +/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */ +/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */ + +/* { dg-final { scan-assembler-times "ldadd\t" 16} } */ +/* { dg-final { scan-assembler-times "ldadda\t" 32} } */ +/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */ +/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */ + +/* { dg-final { scan-assembler-not "ldaxr\t" } } */ +/* { dg-final { scan-assembler-not "stlxr\t" } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c new file mode 100644 index 0000000..fd0f484 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c @@ -0,0 +1,109 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+lse" } */ + +/* Test ARMv8.1-A LD instruction. */ + +#include "atomic-inst-ops.inc" + +#define TEST TEST_ONE + +#define LOAD_OR(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_fetch_or (val, foo, MODEL); \ + } + +#define LOAD_OR_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_fetch_or (val, foo, MODEL); \ + } + +#define LOAD_AND(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_fetch_and (val, foo, MODEL); \ + } + +#define LOAD_AND_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_fetch_and (val, foo, MODEL); \ + } + +#define LOAD_XOR(FN, TY, MODEL) \ + TY FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + return __atomic_fetch_xor (val, foo, MODEL); \ + } + +#define LOAD_XOR_NORETURN(FN, TY, MODEL) \ + void FNNAME (FN, TY) (TY* val, TY* foo) \ + { \ + __atomic_fetch_xor (val, foo, MODEL); \ + } + + +TEST (load_or, LOAD_OR) +TEST (load_or_notreturn, LOAD_OR_NORETURN) + +TEST (load_and, LOAD_AND) +TEST (load_and_notreturn, LOAD_AND_NORETURN) + +TEST (load_xor, LOAD_XOR) +TEST (load_xor_notreturn, LOAD_XOR_NORETURN) + +/* Load-OR. */ + +/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldseth\t" 4} } */ +/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */ +/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldset\t" 8} } */ +/* { dg-final { scan-assembler-times "ldseta\t" 16} } */ +/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */ +/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */ + +/* Load-AND. */ + +/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */ +/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */ +/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldclr\t" 8} */ +/* { dg-final { scan-assembler-times "ldclra\t" 16} } */ +/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */ +/* { dg-final { scan-assembler-times "ldclral\t" 16} } */ + +/* Load-XOR. */ + +/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */ +/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */ +/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */ +/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */ + +/* { dg-final { scan-assembler-times "ldeor\t" 8} */ +/* { dg-final { scan-assembler-times "ldeora\t" 16} } */ +/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */ +/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */ + +/* { dg-final { scan-assembler-not "ldaxr\t" } } */ +/* { dg-final { scan-assembler-not "stlxr\t" } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ -- cgit v1.1